Repository: cloudflare/pingora
Branch: main
Commit: 82342e138c01
Files: 380
Total size: 3.0 MB

Directory structure:
gitextract__dwmq04u/

├── .bleep
├── .cargo/
│   ├── audit.toml
│   └── config.toml
├── .github/
│   ├── CONTRIBUTING.md
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug_report.md
│   │   └── feature_request.md
│   └── workflows/
│       ├── audit.yml
│       ├── build.yml
│       ├── docs.yml
│       ├── mark-stale.yaml
│       └── semgrep.yml
├── .gitignore
├── .rustfmt.toml
├── CHANGELOG.md
├── Cargo.toml
├── Dockerfile
├── LICENSE
├── README.md
├── cliff.toml
├── clippy.toml
├── docs/
│   ├── README.md
│   ├── quick_start.md
│   └── user_guide/
│       ├── conf.md
│       ├── ctx.md
│       ├── daemon.md
│       ├── error_log.md
│       ├── errors.md
│       ├── failover.md
│       ├── graceful.md
│       ├── index.md
│       ├── internals.md
│       ├── modify_filter.md
│       ├── panic.md
│       ├── peer.md
│       ├── phase.md
│       ├── phase_chart.md
│       ├── pooling.md
│       ├── prom.md
│       ├── rate_limiter.md
│       ├── start_stop.md
│       └── systemd.md
├── pingora/
│   ├── Cargo.toml
│   ├── LICENSE
│   ├── examples/
│   │   ├── app/
│   │   │   ├── echo.rs
│   │   │   ├── mod.rs
│   │   │   └── proxy.rs
│   │   ├── client.rs
│   │   ├── server.rs
│   │   └── service/
│   │       ├── echo.rs
│   │       ├── mod.rs
│   │       └── proxy.rs
│   ├── src/
│   │   └── lib.rs
│   └── tests/
│       └── pingora_conf.yaml
├── pingora-boringssl/
│   ├── Cargo.toml
│   ├── LICENSE
│   └── src/
│       ├── boring_tokio.rs
│       ├── ext.rs
│       └── lib.rs
├── pingora-cache/
│   ├── Cargo.toml
│   ├── LICENSE
│   ├── benches/
│   │   ├── lru_memory.rs
│   │   ├── lru_serde.rs
│   │   └── simple_lru_memory.rs
│   └── src/
│       ├── cache_control.rs
│       ├── eviction/
│       │   ├── lru.rs
│       │   ├── mod.rs
│       │   └── simple_lru.rs
│       ├── filters.rs
│       ├── hashtable.rs
│       ├── key.rs
│       ├── lib.rs
│       ├── lock.rs
│       ├── max_file_size.rs
│       ├── memory.rs
│       ├── meta.rs
│       ├── predictor.rs
│       ├── put.rs
│       ├── storage.rs
│       ├── trace.rs
│       └── variance.rs
├── pingora-core/
│   ├── Cargo.toml
│   ├── LICENSE
│   ├── examples/
│   │   ├── bootstrap_as_a_service.rs
│   │   ├── client_cert.rs
│   │   ├── keys/
│   │   │   ├── client-ca/
│   │   │   │   ├── cert.pem
│   │   │   │   └── key.pem
│   │   │   ├── clients/
│   │   │   │   ├── cert-1.pem
│   │   │   │   ├── cert-2.pem
│   │   │   │   ├── invalid-cert.pem
│   │   │   │   ├── invalid-key.pem
│   │   │   │   ├── key-1.pem
│   │   │   │   └── key-2.pem
│   │   │   └── server/
│   │   │       ├── cert.pem
│   │   │       └── key.pem
│   │   └── service_dependencies.rs
│   ├── src/
│   │   ├── apps/
│   │   │   ├── http_app.rs
│   │   │   ├── mod.rs
│   │   │   └── prometheus_http_app.rs
│   │   ├── connectors/
│   │   │   ├── http/
│   │   │   │   ├── custom/
│   │   │   │   │   └── mod.rs
│   │   │   │   ├── mod.rs
│   │   │   │   ├── v1.rs
│   │   │   │   └── v2.rs
│   │   │   ├── l4.rs
│   │   │   ├── mod.rs
│   │   │   ├── offload.rs
│   │   │   └── tls/
│   │   │       ├── boringssl_openssl/
│   │   │       │   └── mod.rs
│   │   │       ├── mod.rs
│   │   │       ├── rustls/
│   │   │       │   └── mod.rs
│   │   │       └── s2n/
│   │   │           └── mod.rs
│   │   ├── lib.rs
│   │   ├── listeners/
│   │   │   ├── connection_filter.rs
│   │   │   ├── l4.rs
│   │   │   ├── mod.rs
│   │   │   └── tls/
│   │   │       ├── boringssl_openssl/
│   │   │       │   └── mod.rs
│   │   │       ├── mod.rs
│   │   │       ├── rustls/
│   │   │       │   └── mod.rs
│   │   │       └── s2n/
│   │   │           └── mod.rs
│   │   ├── modules/
│   │   │   ├── http/
│   │   │   │   ├── compression.rs
│   │   │   │   ├── grpc_web.rs
│   │   │   │   └── mod.rs
│   │   │   └── mod.rs
│   │   ├── protocols/
│   │   │   ├── digest.rs
│   │   │   ├── http/
│   │   │   │   ├── body_buffer.rs
│   │   │   │   ├── bridge/
│   │   │   │   │   ├── grpc_web.rs
│   │   │   │   │   └── mod.rs
│   │   │   │   ├── client.rs
│   │   │   │   ├── compression/
│   │   │   │   │   ├── brotli.rs
│   │   │   │   │   ├── gzip.rs
│   │   │   │   │   ├── mod.rs
│   │   │   │   │   └── zstd.rs
│   │   │   │   ├── conditional_filter.rs
│   │   │   │   ├── custom/
│   │   │   │   │   ├── client.rs
│   │   │   │   │   ├── mod.rs
│   │   │   │   │   └── server.rs
│   │   │   │   ├── date.rs
│   │   │   │   ├── error_resp.rs
│   │   │   │   ├── mod.rs
│   │   │   │   ├── server.rs
│   │   │   │   ├── subrequest/
│   │   │   │   │   ├── body.rs
│   │   │   │   │   ├── dummy.rs
│   │   │   │   │   ├── mod.rs
│   │   │   │   │   └── server.rs
│   │   │   │   ├── v1/
│   │   │   │   │   ├── body.rs
│   │   │   │   │   ├── client.rs
│   │   │   │   │   ├── common.rs
│   │   │   │   │   ├── mod.rs
│   │   │   │   │   └── server.rs
│   │   │   │   └── v2/
│   │   │   │       ├── client.rs
│   │   │   │       ├── mod.rs
│   │   │   │       └── server.rs
│   │   │   ├── l4/
│   │   │   │   ├── ext.rs
│   │   │   │   ├── listener.rs
│   │   │   │   ├── mod.rs
│   │   │   │   ├── socket.rs
│   │   │   │   ├── stream.rs
│   │   │   │   └── virt.rs
│   │   │   ├── mod.rs
│   │   │   ├── raw_connect.rs
│   │   │   ├── tls/
│   │   │   │   ├── boringssl_openssl/
│   │   │   │   │   ├── client.rs
│   │   │   │   │   ├── mod.rs
│   │   │   │   │   ├── server.rs
│   │   │   │   │   └── stream.rs
│   │   │   │   ├── digest.rs
│   │   │   │   ├── mod.rs
│   │   │   │   ├── noop_tls/
│   │   │   │   │   └── mod.rs
│   │   │   │   ├── rustls/
│   │   │   │   │   ├── client.rs
│   │   │   │   │   ├── mod.rs
│   │   │   │   │   ├── server.rs
│   │   │   │   │   └── stream.rs
│   │   │   │   └── s2n/
│   │   │   │       ├── client.rs
│   │   │   │       ├── mod.rs
│   │   │   │       ├── server.rs
│   │   │   │       └── stream.rs
│   │   │   └── windows.rs
│   │   ├── server/
│   │   │   ├── bootstrap_services.rs
│   │   │   ├── configuration/
│   │   │   │   └── mod.rs
│   │   │   ├── daemon.rs
│   │   │   ├── mod.rs
│   │   │   └── transfer_fd/
│   │   │       └── mod.rs
│   │   ├── services/
│   │   │   ├── background.rs
│   │   │   ├── listening.rs
│   │   │   └── mod.rs
│   │   ├── tls/
│   │   │   └── mod.rs
│   │   ├── upstreams/
│   │   │   ├── mod.rs
│   │   │   └── peer.rs
│   │   └── utils/
│   │       ├── mod.rs
│   │       └── tls/
│   │           ├── boringssl_openssl.rs
│   │           ├── mod.rs
│   │           ├── rustls.rs
│   │           └── s2n.rs
│   └── tests/
│       ├── certs/
│       │   ├── alt-ca.crt
│       │   ├── alt-server.crt
│       │   ├── ca.crt
│       │   ├── server.crt
│       │   └── server.key
│       ├── keys/
│       │   ├── key.pem
│       │   ├── public.pem
│       │   ├── server.crt
│       │   └── server.csr
│       ├── nginx.conf
│       ├── nginx_proxy.conf
│       ├── pingora_conf.yaml
│       ├── server_phase_fastshutdown.rs
│       ├── server_phase_gracefulshutdown.rs
│       ├── test_basic.rs
│       └── utils/
│           └── mod.rs
├── pingora-error/
│   ├── Cargo.toml
│   ├── LICENSE
│   └── src/
│       ├── immut_str.rs
│       └── lib.rs
├── pingora-header-serde/
│   ├── Cargo.toml
│   ├── LICENSE
│   ├── samples/
│   │   └── test/
│   │       ├── 1
│   │       ├── 2
│   │       ├── 3
│   │       ├── 4
│   │       ├── 5
│   │       ├── 6
│   │       └── 7
│   └── src/
│       ├── dict.rs
│       ├── lib.rs
│       ├── thread_zstd.rs
│       └── trainer.rs
├── pingora-http/
│   ├── Cargo.toml
│   ├── LICENSE
│   └── src/
│       ├── case_header_name.rs
│       └── lib.rs
├── pingora-ketama/
│   ├── Cargo.toml
│   ├── LICENSE
│   ├── benches/
│   │   ├── memory.rs
│   │   └── simple.rs
│   ├── examples/
│   │   └── health_aware_selector.rs
│   ├── src/
│   │   └── lib.rs
│   ├── test-data/
│   │   ├── README.md
│   │   ├── nginx.conf
│   │   ├── sample-nginx-upstream.csv
│   │   └── trace.sh
│   └── tests/
│       ├── backwards_compat.rs
│       └── old_version/
│           └── mod.rs
├── pingora-limits/
│   ├── Cargo.toml
│   ├── LICENSE
│   ├── benches/
│   │   └── benchmark.rs
│   └── src/
│       ├── estimator.rs
│       ├── inflight.rs
│       ├── lib.rs
│       └── rate.rs
├── pingora-load-balancing/
│   ├── Cargo.toml
│   ├── LICENSE
│   └── src/
│       ├── background.rs
│       ├── discovery.rs
│       ├── health_check.rs
│       ├── lib.rs
│       └── selection/
│           ├── algorithms.rs
│           ├── consistent.rs
│           ├── mod.rs
│           └── weighted.rs
├── pingora-lru/
│   ├── Cargo.toml
│   ├── LICENSE
│   ├── benches/
│   │   ├── bench_linked_list.rs
│   │   └── bench_lru.rs
│   └── src/
│       ├── lib.rs
│       └── linked_list.rs
├── pingora-memory-cache/
│   ├── Cargo.toml
│   ├── LICENSE
│   └── src/
│       ├── lib.rs
│       └── read_through.rs
├── pingora-openssl/
│   ├── Cargo.toml
│   ├── LICENSE
│   └── src/
│       ├── ext.rs
│       └── lib.rs
├── pingora-pool/
│   ├── Cargo.toml
│   ├── LICENSE
│   └── src/
│       ├── connection.rs
│       ├── lib.rs
│       └── lru.rs
├── pingora-proxy/
│   ├── Cargo.toml
│   ├── LICENSE
│   ├── examples/
│   │   ├── backoff_retry.rs
│   │   ├── conf.yaml
│   │   ├── connection_filter.rs
│   │   ├── ctx.rs
│   │   ├── gateway.rs
│   │   ├── grpc_web_module.rs
│   │   ├── load_balancer.rs
│   │   ├── modify_response.rs
│   │   ├── multi_lb.rs
│   │   ├── rate_limiter.rs
│   │   ├── use_module.rs
│   │   └── virtual_l4.rs
│   ├── src/
│   │   ├── lib.rs
│   │   ├── proxy_cache.rs
│   │   ├── proxy_common.rs
│   │   ├── proxy_custom.rs
│   │   ├── proxy_h1.rs
│   │   ├── proxy_h2.rs
│   │   ├── proxy_purge.rs
│   │   ├── proxy_trait.rs
│   │   └── subrequest/
│   │       ├── mod.rs
│   │       └── pipe.rs
│   └── tests/
│       ├── headers.dict
│       ├── keys/
│       │   ├── key.pem
│       │   ├── public.pem
│       │   ├── server.crt
│       │   └── server.csr
│       ├── pingora_conf.yaml
│       ├── test_basic.rs
│       ├── test_upstream.rs
│       └── utils/
│           ├── cert.rs
│           ├── conf/
│           │   ├── keys/
│           │   │   ├── README.md
│           │   │   ├── ca1.crt
│           │   │   ├── ca1.key.pem
│           │   │   ├── ca2.crt
│           │   │   ├── ca_chain.cert
│           │   │   ├── ca_chain.srl
│           │   │   ├── cert_chain.crt
│           │   │   ├── curve_test.384.crt
│           │   │   ├── curve_test.384.key.pem
│           │   │   ├── curve_test.521.crt
│           │   │   ├── curve_test.521.key.pem
│           │   │   ├── ex1.crt
│           │   │   ├── ex1.key.b64
│           │   │   ├── intermediate.cnf
│           │   │   ├── intermediate.crt
│           │   │   ├── intermediate.csr
│           │   │   ├── intermediate.key
│           │   │   ├── intermediate.srl
│           │   │   ├── key.pem
│           │   │   ├── leaf.cnf
│           │   │   ├── leaf.crt
│           │   │   ├── leaf.csr
│           │   │   ├── leaf.key
│           │   │   ├── leaf.srl
│           │   │   ├── leaf2.crt
│           │   │   ├── leaf2.csr
│           │   │   ├── leaf2.key
│           │   │   ├── leaf2.srl
│           │   │   ├── public.pem
│           │   │   ├── root.crt
│           │   │   ├── root.key
│           │   │   ├── root.srl
│           │   │   ├── server.crt
│           │   │   ├── server_boringssl_openssl.crt
│           │   │   ├── server_boringssl_openssl.csr
│           │   │   ├── server_rustls.crt
│           │   │   ├── server_s2n.crt
│           │   │   └── v3.ext
│           │   └── origin/
│           │       ├── .gitignore
│           │       ├── conf/
│           │       │   └── nginx.conf
│           │       └── html/
│           │           └── index.html
│           ├── mock_origin.rs
│           ├── mod.rs
│           ├── server_utils.rs
│           └── websocket/
│               ├── mod.rs
│               ├── ws_echo.rs
│               └── ws_echo_raw.rs
├── pingora-runtime/
│   ├── Cargo.toml
│   ├── LICENSE
│   ├── benches/
│   │   └── hello.rs
│   └── src/
│       └── lib.rs
├── pingora-rustls/
│   ├── Cargo.toml
│   ├── LICENSE
│   └── src/
│       └── lib.rs
├── pingora-s2n/
│   ├── Cargo.toml
│   ├── LICENSE
│   └── src/
│       └── lib.rs
├── pingora-timeout/
│   ├── Cargo.toml
│   ├── LICENSE
│   ├── benches/
│   │   └── benchmark.rs
│   └── src/
│       ├── fast_timeout.rs
│       ├── lib.rs
│       └── timer.rs
└── tinyufo/
    ├── Cargo.toml
    ├── LICENSE
    ├── README.md
    ├── benches/
    │   ├── bench_hit_ratio.rs
    │   ├── bench_memory.rs
    │   └── bench_perf.rs
    └── src/
        ├── buckets.rs
        ├── estimation.rs
        └── lib.rs

================================================
FILE CONTENTS
================================================

================================================
FILE: .bleep
================================================
5a1cf681f7e2691687623b60387a88076493015f

================================================
FILE: .cargo/audit.toml
================================================
[advisories]
ignore = [
    # This came from the prometheus crate's protobuf encoder.
    # We don't use the protobuf encoder, only the text one.
    # https://rustsec.org/advisories/RUSTSEC-2024-0437
    "RUSTSEC-2024-0437",
]


================================================
FILE: .cargo/config.toml
================================================
[resolver]
incompatible-rust-versions = "fallback"

================================================
FILE: .github/CONTRIBUTING.md
================================================
# Contributing

Welcome to Pingora! Before you make a contribution, be it a bug report, documentation improvement,
pull request (PR), etc., please read and follow these guidelines.

## Start with filing an issue

More often than not, **start by filing an issue on GitHub**. If you have a bug report or feature
request, open a GitHub issue. Non-trivial PRs will also require a GitHub issue. The issue provides
us with a space to discuss proposed changes with you and the community.

Having a discussion via GitHub issue upfront is the best way to ensure your contribution lands in
Pingora. We don't want you to spend your time making a PR, only to find that we won't accept it on
a design basis. For example, we may find that your proposed feature works better as a third-party
module built on top of or for use with Pingora and encourage you to pursue that direction instead.

**You do not need to file an issue for small fixes.** What counts as a "small" or trivial fix is a
judgment call, so here's a few examples to clarify:
- fixing a typo
- refactoring a bit of code
- most documentation or comment edits

Still, _sometimes_ we may review your PR and ask you to file an issue if we expect there are larger
design decisions to be made.

## Making a PR

After you've filed an issue, you can make your PR referencing that issue number. Once you open your
PR, it will be labelled _Needs Review_. A maintainer will review your PR as soon as they can. The
reviewer may ask for changes - they will mark the PR as _Changes Requested_ and will give you
details about the requested changes. Feel free to ask lots of questions! The maintainers are there
to help you.

Once we (the maintainers) decide to accept your change, we will label your PR as _Accepted_.
Later (usually within a week or two), we will rebase your commits onto the `main` branch in a
separate PR, batched alongside other _Accepted_ commits and any internal changes. (This process
allows us to sync the state of our internal repository with the public repository.) Once your
change lands in `main`, we will close your PR.

### Caveats

Currently, internal contributions will take priority. Today Pingora is being maintained by
Cloudflare's Content Delivery team, and internal Cloudflare proxy services are a primary user of
Pingora. We value the community's work on Pingora, but the reality is that our team has a limited
amount of resources and time. We can't promise we will review or address all PRs or issues in a
timely manner.

## Conduct

Pingora and Cloudflare OpenSource generally follows the [Contributor Covenant Code of Conduct].
Violating the CoC could result in a warning or a ban to Pingora or any and all repositories in the Cloudflare organization.

[Contributor Covenant Code of Conduct]: https://github.com/cloudflare/.github/blob/26b37ca2ba7ab3d91050ead9f2c0e30674d3b91e/CODE_OF_CONDUCT.md

## Contact

If you have any questions, please reach out to [opensource@cloudflare.com](mailto:opensource@cloudflare.com).


================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.md
================================================
---
name: Bug Report
about: Report an issue to help us improve
title: ''
labels: ''
assignees: ''
---

## Describe the bug

A clear and concise description of what the bug is.

## Pingora info

Please include the following information about your environment:

**Pingora version**: release number of commit hash
**Rust version**: i.e. `cargo --version`
**Operating system version**: e.g. Ubuntu 22.04, Debian 12.4

## Steps to reproduce

Please provide step-by-step instructions to reproduce the issue. Include any relevant code
snippets.

## Expected results

What were you expecting to happen?

## Observed results

What actually happened?

## Additional context

What other information would you like to provide? e.g. screenshots, how you're working around the
issue, or other clues you think could be helpful to identify the root cause.


================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.md
================================================
---
name: Feature request
about: Propose a new feature
title: ''
labels: ''
assignees: ''
---

## What is the problem your feature solves, or the need it fulfills?

A clear and concise description of why this feature should be added. What is the problem? Who is
this for?

## Describe the solution you'd like

What do you propose to resolve the problem or fulfill the need above? How would you like it to
work?

## Describe alternatives you've considered

What other solutions, features, or workarounds have you considered that might also solve the issue?
What are the tradeoffs for these alternatives compared to what you're proposing?

## Additional context

This could include references to documentation or papers, prior art, screenshots, or benchmark
results.


================================================
FILE: .github/workflows/audit.yml
================================================
name: Security Audit

on:
  push:
    branches:
      - master
    paths:
      - "**/Cargo.toml"
  schedule:
    - cron: "0 2 * * *" # run at 2 AM UTC

permissions:
  contents: read

jobs:
  security-audit:
    permissions:
      checks: write # for rustsec/audit-check to create check
      contents: read # for actions/checkout to fetch code
      issues: write # for rustsec/audit-check to create issues
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Generate Cargo.lock
        # https://github.com/rustsec/audit-check/issues/27
        run: cargo generate-lockfile --ignore-rust-version

      - name: Audit Check
        # https://github.com/rustsec/audit-check/issues/2
        uses: rustsec/audit-check@master
        with:
          token: ${{ secrets.GITHUB_TOKEN }}


================================================
FILE: .github/workflows/build.yml
================================================
on: [push, pull_request]

name: build

jobs:
  pingora:
    strategy:
      fail-fast: false
      matrix:
        # nightly, msrv, and latest stable
        toolchain: [nightly, 1.84.0, 1.91.1]
    runs-on: ubuntu-latest
    # Only run on "pull_request" event for external PRs. This is to avoid
    # duplicate builds for PRs created from internal branches.
    if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
    steps:
      - name: Checkout sources
        uses: actions/checkout@v4
        with:
          submodules: "recursive"

      - name: Install build dependencies
        run: |
          sudo apt update
          sudo apt install -y cmake libclang-dev wget gnupg ca-certificates lsb-release --no-install-recommends
          # openresty is used for convenience in tests as a server.
          wget -O - https://openresty.org/package/pubkey.gpg | sudo gpg --dearmor -o /usr/share/keyrings/openresty.gpg
          echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/openresty.gpg] http://openresty.org/package/ubuntu $(lsb_release -sc) main" | sudo tee /etc/apt/sources.list.d/openresty.list > /dev/null
          sudo apt update
          sudo apt install -y openresty --no-install-recommends

      - name: Install toolchain
        uses: dtolnay/rust-toolchain@master
        with:
          toolchain: ${{ matrix.toolchain }}
          components: rustfmt, clippy

      - name: Run cargo fmt
        run: cargo fmt --all -- --check

      - name: Run cargo test
        run: cargo test --verbose --lib --bins --tests --no-fail-fast

      # Need to run doc tests separately.
      # (https://github.com/rust-lang/cargo/issues/6669)
      - name: Run cargo doc test
        run: cargo test --verbose --doc

      - name: Run cargo clippy
        run: |
          [[ ${{ matrix.toolchain }} != 1.91.1 ]] || cargo clippy --all-targets --all -- --allow=unknown-lints --deny=warnings

      - name: Run cargo audit
        run: |
          [[ ${{ matrix.toolchain }} != 1.91.1 ]] || (cargo install --locked cargo-audit && cargo generate-lockfile --ignore-rust-version && cargo audit)

      - name: Run cargo machete
        run: |
          [[ ${{ matrix.toolchain }} != 1.91.1 ]] || (cargo install cargo-machete --version 0.7.0 && cargo machete)


================================================
FILE: .github/workflows/docs.yml
================================================
on:
  push:
    branches:
      - master

name: Docs

jobs:
  docs:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout sources
        uses: actions/checkout@v4
        with:
          submodules: "recursive"

      - name: Install build dependencies
        run: |
          sudo apt update
          sudo apt install -y cmake libclang-dev

      - name: Install stable toolchain
        uses: dtolnay/rust-toolchain@stable

      - name: Run cargo doc
        run: cargo doc --no-deps --all-features


================================================
FILE: .github/workflows/mark-stale.yaml
================================================
name: 'Close stale questions'
on:
  schedule:
    - cron: '30 1 * * *'
  workflow_dispatch:

permissions:
  issues: write
  pull-requests: write

jobs:
  stale:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/stale@v9
        with:
          stale-issue-message: 'This question has been stale for a week. It will be closed in an additional day if not updated.'
          close-issue-message: 'This issue has been closed because it has been stalled with no activity.'
          days-before-stale: -1
          days-before-issue-stale: 7
          days-before-issue-close: 1
          stale-issue-label: 'stale'
          only-issue-labels: 'question'


================================================
FILE: .github/workflows/semgrep.yml
================================================
on:
  pull_request: {}
  workflow_dispatch: {}
  push: 
    branches:
      - main
      - master
  schedule:
    - cron: '0 0 * * *'
name: Semgrep config
jobs:
  semgrep:
    name: semgrep/ci
    runs-on: ubuntu-latest
    env:
      SEMGREP_APP_TOKEN: ${{ secrets.SEMGREP_APP_TOKEN }}
      SEMGREP_URL: https://cloudflare.semgrep.dev
      SEMGREP_APP_URL: https://cloudflare.semgrep.dev
      SEMGREP_VERSION_CHECK_URL: https://cloudflare.semgrep.dev/api/check-version
    container:
      image: returntocorp/semgrep
    steps:
      - uses: actions/checkout@v4
      - run: semgrep ci


================================================
FILE: .gitignore
================================================
Cargo.lock
/target
**/*.rs.bk
dhat-heap.json
.vscode
.idea
.cover
bleeper.user.toml

================================================
FILE: .rustfmt.toml
================================================
edition = "2021"


================================================
FILE: CHANGELOG.md
================================================
# Changelog

All notable changes to this project will be documented in this file.

## [0.8.0](https://github.com/cloudflare/pingora/compare/0.7.0...0.8.0) - 2026-03-02


**🚀 Features**

* Add support for client certificate verification in mTLS configuration.
* Add upstream\_write\_pending\_time to Session for upload diagnostics.
* Pipe subrequests utility: creates a state machine to treat subrequests as a "pipe," enabling direct sending of request body and writing of response tasks, with a handler for error propagation and support for reusing a preset or captured input body for chained subrequests.
* Add the ability to limit the number of times a downstream connection can be reused
* Add a system for specifying and using service-level dependencies
* Add a builder for pingora proxy service, e.g. to specify ServerOptions.

**🐛 Bug Fixes**

* Fix various Windows compiler issues.
* Handle custom ALPNs in s2n impl of ALPN::to\_wire\_protocols() to fix s2n compile issues.
* Fix: don't use “all” permissions for socket.
* Fix a bug with the ketama load balancing where configurations were not persisted after updates.
* Ensure http1 downstream session is not reused on more body bytes than expected.
* Send RST\_STREAM CANCEL on application read timeouts for h2 client.
* Start close-delimited body mode after 101 is received for WebSocket upgrades. `UpgradedBody` is now an explicit HttpTask.
* Avoid close delimit mode on http/1.0 req.
* Reject invalid content-length http/1 requests to eliminate ambiguous request framing.
* Validate invalid content-length on http/1 resp by default, and removes content-length from the response if transfer-encoding is present, per RFC.
* Correct the custom protocol code for shutdown: changed the numeric code passed on shutdown to 0 to indicate an explicit shutdown rather than a transport error.

**⚙️ Miscellaneous Tasks**

* Remove `CacheKey::default` impl, users of caching should implement `cache_key_callback` themselves
* Allow server bootstrapping to take place in the context of services with dependents and dependencies
* Don't consider "bytes=" a valid range header: added an early check for an empty/whitespace-only range-set after the `bytes=` prefix, returning 416 Range Not Satisfiable, consistent with RFC 9110 14.1.2.
* Strip {content, transfer}-encoding from 416s to mirror the behavior for 304 Not Modified responses.
* Disable CONNECT method proxying by default, with an option to enable via server options; unsupported requests will now be automatically rejected.

## [0.7.0](https://github.com/cloudflare/pingora/compare/0.6.0...0.7.0) - 2026-01-30

### Highlights

- Extensible SslDigest to save user-defined TLS context
- Add ConnectionFilter trait for early TCP connection filtering

### 🚀 Features

- Add ConnectionFilter trait for early TCP connection filtering
- Introduce a virtual L4 stream abstraction
- Add support for verify_cert and verify_hostname using rustls
- Exposes the HttpProxy struct to allow external crates to customize the proxy logic.
- Exposes a new_mtls method for creating a HttpProxy with a client_cert_key to enable mtls peers.
- Add SSLKEYLOGFILE support to rustls connector
- Allow spawning background subrequests from main session
- Allow Extensions in cache LockCore and user tracing
- Add body-bytes tracking across H1/H2 and proxy metrics
- Allow setting max_weight on MissFinishType::Appended
- Allow adding SslDigestExtensions on downstream and upstream
- Add Custom session support for encapsulated HTTP

### 🐛 Bug Fixes

- Use write timeout consistently for h2 body writes
- Prevent downstream error prior to header from canceling cache fill
- Fix debug log and new tests
- Fix size calculation for buffer capacity
- Fix cache admission on header only misses
- Fix duplicate zero-size chunk on cache hit
- Fix chunked trailer end parsing
- Lock age timeouts cause lock reacquisition
- Fix transfer fd compile error for non linux os

### Sec

- Removed atty
- Upgrade lru to >= 0.16.3 crate version because of RUSTSEC-2026-0002

### Everything Else

- Add tracing to log reason for not caching an asset on cache put
- Evict when asset count exceeds optional watermark
- Remove trailing comma from Display for HttpPeer
- Make ProxyHTTP::upstream_response_body_filter return an optional duration for rate limiting
- Restore daemonize STDOUT/STDERR when error log file is not specified
- Log task info when upstream header failed to send
- Check cache enablement to determine cache fill
- Update meta when revalidating before lock release
- Add ForceFresh status to cache hit filter
- Pass stale status to cache lock
- Bump max multipart ranges to 200
- Downgrade Expires header warn to debug log
- CI and effective msrv bump to 1.83
- Add default noop custom param to client Session
- Use static str in ErrorSource or ErrorType as_str
- Use bstr for formatting byte strings
- Tweak the implementation of and documentation of `connection_filter` feature
- Set h1.1 when proxying cacheable responses
- Add or remove accept-ranges on range header filter
- Update msrv in github ci, fixup .bleep
- Override request keepalive on process shutdown
- Add shutdown flag to proxy session
- Add ResponseHeader in pingora_http crate's prelude
- Add a configurable upgrade for pingora-ketama that reduces runtime cpu and memory
- Add to cache api spans
- Increase visibility of multirange items
- Use seek_multipart on body readers
- Log read error when reading trailers end
- Re-add the warning about cache-api volatility
- Default to close on downstream response before body finish
- Ensure idle_timeout is polled even if idle_timeout is unset so notify events are registered for h2 idle pool, filter out closed connections when retrieving from h2 in use pool.
- Add simple read test for invalid extra char in header end
- Allow customizing lock status on Custom NoCacheReasons
- Close h1 conn by default if req header unfinished
- Add configurable retries for upgrade sock connect/accept
- Deflake test by increasing write size
- Make the version restrictions on rmp and rmp-serde more strict to prevent forcing consumers to use 2024 edition
- Rewind preread bytes when parsing next H1 response
- Add epoch and epoch_override to CacheMeta

## [0.6.0](https://github.com/cloudflare/pingora/compare/0.5.0...0.6.0) - 2025-08-15

### Highlights
- This release bumps the minimum h2 crate dependency to guard against the [MadeYouReset]((https://blog.cloudflare.com/madeyoureset-an-http-2-vulnerability-thwarted-by-rapid-reset-mitigations/)) H2 attack


### 🚀 Features

- Log runtime names during Server shutdown
- Enabling tracking the execution phase of a server
- Allow using in-memory compression dicts
- Make H2Options configurable at HttpServer, HttpProxy
  Also adds HttpServerOptions to the HttpServer implementation, and
  updates the HttpEchoApp to use HttpServer for easier adhoc testing.

### 🐛 Bug Fixes

- Fix: read body without discard

### Everything Else

- Try loading each LRU shard individually and warn on errors
- Update LRU save to disk to be atomic
- Allow cache to spawn_async_purge
- Pass hit handler in hit filter
- Cache hit filter can mutate cache, allow resetting cache lock
- Persist keepalive_timeout between requests on same stream
- Properly check for H2 io ReadError retry types
- Add cache lock wait timeout for readers
- Fix CacheLock status timeout conditions
- Handle close on partial chunk head
- Allow optional to reset session timeouts
- Clippy fixes for 1.87, add 1.87 to GitHub CI
- Run `range_{header,body}_filter` after disabling cache
- Convert `InterpretCacheControl` members to `Duration`
- Disable downstream ranging on max file size
- Allow explicit infinite keepalive timeout to be respected
  Note that a necessary follow up is to refactor the infinite keepalive
  timeout to only apply to first read between requests on reused conns.
- Add method to disable keepalive if downstream is unfinished
- Discard extra upstream body and disable keepalive
- Explicitly disable keepalive on upstream connection when excess body
  (content-length) is detected.
- Add brief sleep to shutdown signal tests to avoid flake
- Allow override of cache lock timeouts
- Allow arbitrary bytes in CacheKey instead of just Strings
- Corrects out-of-order data return after multiple peek calls with different buffer sizes.
- Mark previously too large chunked assets as cacheable
- Boring/OpenSSL load cert chain from connector options
- Add initial support for multipart range requests
- Adds a callback to HttpHealthCheck for collecting detailed backend summary information
- Multipart range filter state fixes


### Docs

- Explanation of request_body_filter phase


## [0.5.0](https://github.com/cloudflare/pingora/compare/0.4.0...0.5.0) - 2025-05-09

### 🚀 Features

- [Add tweak_new_upstream_tcp_connection hook to invoke logic on new upstream TCP sockets prior to connection](https://github.com/cloudflare/pingora/commit/be4a023d18c2b061f64ad5efd0868f9498199c91)
- [Add ability to configure max retries for upstream proxy failures](https://github.com/cloudflare/pingora/commit/6c5d6021a6e67c971e835bef269655d0db94c2d1)
- [Allow tcp user timeout to be configurable](https://github.com/cloudflare/pingora/commit/e77ca63da58892281f36dcb97c51a8b1e882e2f6)
- [Add peer address to downstream handshake error logs](https://github.com/cloudflare/pingora/commit/3f9e0a2fae8feaea12a1a9687e6b4bf4616f66c5)
- [Allow proxy to set stream level downstream read timeout](https://github.com/cloudflare/pingora/commit/87ae8ce2e7883c0a924a776b193c8a4f858b9349)
- [Improve support for sending custom response headers and bodies for error messages](https://github.com/cloudflare/pingora/commit/a8a6e77eef2c0f4d2a45f00c5b0e316dd373f2f2)
- [Allow configuring multiple listener tasks per endpoint](https://github.com/cloudflare/pingora/commit/69254671148938f6bc467f6decc2fc89ee7f531e)
- [Add get_stale and get_stale_while_update for memory-cache](https://github.com/cloudflare/pingora/commit/bb28044cbe9ac9251940b8a313d970c7d15aaff6)

### 🐛 Bug Fixes

- [Fix deadloop if proxy_handle_upstream exits earlier than proxy_handle_downstream](https://github.com/cloudflare/pingora/commit/bb111aaa92b3753e650957df3a68f56b0cffc65d)
- [Check on h2 stream end if error occurred for forwarding HTTP tasks](https://github.com/cloudflare/pingora/commit/e18f41bb6ddb1d6354e824df3b91d77f3255bea2)
- [Check for content-length underflow on end of stream h2 header](https://github.com/cloudflare/pingora/commit/575d1aafd7c679a50a443701a4c55dcfdbc443b2)
- [Correctly send empty h2 data frames prior to capacity polling](https://github.com/cloudflare/pingora/commit/c54190432a2efea30c5a0187bb7d078d33570a43)
- [Signal that the response is done when body write finishes to avoid h1 downstream/h2 upstream errors](https://github.com/cloudflare/pingora/commit/5750e4279e75b1e764dcfc5530aa7a7cebe3abef)
- [Ignore h2 pipe error when finishing an H2 upstream](https://github.com/cloudflare/pingora/commit/8ad15031291eb5779e0e93e714eb969c4132f632)
- [Add finish_request_body() for HTTP healthchecks so that H2 healthchecks succeed](https://github.com/cloudflare/pingora/commit/67bc7cc170e754d335cc1d6d526f203c4345eceb)
- [Fix Windows compile errors by updating `impl<T> UniqueID` to use correct return type](https://github.com/cloudflare/pingora/commit/1756948df77d257bddf7ab798cc3fddf348a91c8)
- [Fixed compilation errors on Windows](https://github.com/cloudflare/pingora/commit/906cb90864bf6e441727083c9cbd4f6fb289d6f5)
- [Poll for H2 capacity before sending H2 body to propagate backpressure](https://github.com/cloudflare/pingora/commit/b6f24ff3725d9d8b6a740d87cad959d94befbe54)
- [Fix for write_error_response for http2 downstreams to set EOS](https://github.com/cloudflare/pingora/commit/c0fa5065812d87e6e404c5624b26cd99c5194079)
- [Always drain v1 request body before session reuse](https://github.com/cloudflare/pingora/commit/fda3317ec822678564d641e7cf1c9b77ee3759ff)
- [Fixes HTTP1 client reads to properly timeout on initial read](https://github.com/cloudflare/pingora/commit/3c7db34acb0d930ae7043290a88bc56c1cd77e45)
- [Fixes issue where if TLS client never sends any bytes, hangs forever](https://github.com/cloudflare/pingora/commit/d1bf0bcac98f943fd716278d674e7d10dce2223e)

### Everything Else

- [Add builder api for pingora listeners](https://github.com/cloudflare/pingora/commit/3f564af3ae56e898478e13e71d67d095d7f5dbbd)
- [Better handling for h1 requests that contain both transfer-encoding and content-length](https://github.com/cloudflare/pingora/commit/9287b82645be4a52b0b63530ba38aa0c7ddc4b77)
- [Allow setting raw path in request to support non-UTF8 use cases](https://github.com/cloudflare/pingora/commit/e6b823c5d89860bb97713fdf14f197f799aed6af)
- [Allow reusing session on errors prior to proxy upstream](https://github.com/cloudflare/pingora/commit/f8d01278a586c60392b1e3b92e5ed97a415d8fe7)
- [Avoid allocating large buffer in the accept() loop](https://github.com/cloudflare/pingora/commit/ef234f5baa45650be064c7dd34c2f17986361480)
- [Ensure HTTP/1.1 when forcing chunked encoding](https://github.com/cloudflare/pingora/commit/9281cab8eab1b545f15f0e387d2ba4cd2ca27364)
- [Reject if the HTTP header contains duplicated Content-Length values](https://github.com/cloudflare/pingora/commit/eef35768d11305d1293468a6c3ce91a3858dc0fc)
- [proxy_upstream_filter tries to reuse downstream by default](https://github.com/cloudflare/pingora/commit/86293e65b5c7d8a96f3a333a1f191766dc95bee5)
- [Allow building server that avoids std::process::exit during shutdown](https://github.com/cloudflare/pingora/commit/2d977d4eb808d8bcbc0ce87cabac4cf4854dfb80)
- [Update Sentry crate to 0.36](https://github.com/cloudflare/pingora/commit/01a1f9a65c51a4351c29d6961ea3164a6a811958)
- [Update the bounds on `MemoryCache` methods to accept broader key types](https://github.com/cloudflare/pingora/commit/d66923a9a41d00b326cef5dfb57d8c020d6a4abb)
- [Flush already received data if upstream write errors](https://github.com/cloudflare/pingora/commit/aa7c2f1a89a652137a987e5f5dbdab228c2f4d06)
- [Allow modules to receive HttpTask::Done, flush response compression on receiving Done task](https://github.com/cloudflare/pingora/commit/c82fb6ba57b95c256b58095881a33a9bc08f170a)
- API signature changes as part of experimental proxy cache support
- Note MSRV was effectively bumped to 1.82 from 1.72 due to a dependency update, though older compilers may still be able to build by pinning dependencies, e.g. `cargo update -p backtrace --precise 0.3.74`.

## [0.4.0](https://github.com/cloudflare/pingora/compare/0.3.0...0.4.0) - 2024-11-01

### 🚀 Features
- [Add preliminary rustls support](https://github.com/cloudflare/pingora/commit/354a6ee1e99b82e23fc0f27a37d8bf41e62b2dc5)
- [Add experimental support for windows](https://github.com/cloudflare/pingora/commit/4aadba12727afe6178f3b9fc2a3cad2223ac7b2e)
- [Add the option to use no TLS implementation](https://github.com/cloudflare/pingora/commit/d8f3ffae77ddc1edd285ab1d517a1b6748ce3d58)
- [Add support for gRPC-web module to bridge gRPC-web client requests to gRPC server requests](https://github.com/cloudflare/pingora/commit/9917177c646a0ab58197f15ec57a3bcbe1e0a201)
- [Add the support for h2c and http1 to coexist](https://github.com/cloudflare/pingora/commit/792d5fd3c14c1cd588b155ddf09c09a4c125a26b)
- [Add the support for custom L4 connector](https://github.com/cloudflare/pingora/commit/7c122e7f36de5c946ac960a1691c5dd41f26e6e6)
- [Support opaque extension field in Backend](https://github.com/cloudflare/pingora/commit/999e379064d2c1266a267abdf9f4f41b14bffcf5)
- [Add the ability to ignore informational responses when proxying downstream](https://github.com/cloudflare/pingora/commit/be97e35031cf4f5a01191f1848bdf491bd9f0d62)
- [Add un-gzip support and allow decompress by algorithm](https://github.com/cloudflare/pingora/commit/e1c6e57db3e613991eda3160d15f81e0669ea066)
- [Add the ability to observe backend health status](https://github.com/cloudflare/pingora/commit/8a0c73f174a27a87c54426a748c4818b10de9425)
- [Add the support for passing sentry release](https://github.com/cloudflare/pingora/commit/07a970e413009ee62fc4c15e0820ae1aa036af22)
- [Add the support for binding to local port ranges](https://github.com/cloudflare/pingora/commit/d1d7a87b761eeb4f71fcaa3f7c4ae8e32f1d93c8)
- [Support retrieving rx timestamp for TcpStream](https://github.com/cloudflare/pingora/commit/d811795938cee5a6eb7cd46399cef17210a0d0c5)

### 🐛 Bug Fixes
- [Handle bare IPv6 address in raw connect Host](https://github.com/cloudflare/pingora/commit/9f50e6ccb09db2940eec6fc170a1e9e9b14a95d0)
- [Set proper response headers when compression is enabled](https://github.com/cloudflare/pingora/commit/55049c4e7983055551b34feee397c736ffc912bb)
- [Check the current advertised h2 max streams](https://github.com/cloudflare/pingora/commit/7419b1967e7686b00aefb7bcd2a4dfe59b31e639)
- Other bug fixes and improvements


### ⚙️ Changes and Miscellaneous Tasks
- [Make sentry an optional feature](https://github.com/cloudflare/pingora/commit/ab1b717bf587723c1c537d6549a8f8096f0900d4)
- [Make timeouts Sync](https://github.com/cloudflare/pingora/commit/18db42cd2cb892432fd7896f0da7e9d19221214b)
- [Retry all h2 connection when encountering graceful shutdown](https://github.com/cloudflare/pingora/commit/11b5882a422774cffbd14d9a9ea7dfc9dc98b02c)
- [Make l4 module pub to expose Connect](https://github.com/cloudflare/pingora/commit/91702bb0c0c5e1f2d5e2f40a19a3f340bb5a6d82)
- [Auto snake case set-cookie header when downgrade to from h2 to http1.1](https://github.com/cloudflare/pingora/commit/2c6190c634f2a5dd2f00e8597902f2b735a9d84f)
- [shutdown h2 connection gracefully with GOAWAYs](https://github.com/cloudflare/pingora/commit/04d7cfeef6205d2cf33ad5704a363ee107250771)
- Other API signature updates

## [0.3.0](https://github.com/cloudflare/pingora/compare/0.2.0...0.3.0) - 2024-07-12

### 🚀 Features
- Add support for HTTP modules. This feature allows users to import modules written by 3rd parties.
- Add `request_body_filter`. Now request body can be inspected and modified.
- Add H2c support.
- Add TCP fast open support.
- Add support for server side TCP keep-alive.
- Add support to get TCP_INFO.
- Add support to set DSCP.
- Add `or_err()`/`or_err_with` API to convert `Options` to `pingora::Error`.
- Add `or_fail()` API to convert `impl std::error::Error` to `pingora::Error`.
- Add the API to track socket read and write pending time.
- Compression: allow setting level per algorithm.

### 🐛 Bug Fixes
- Fixed a panic when using multiple H2 streams in the same H2 connection to upstreams.
- Pingora now respects the `Connection` header it sends to upstream.
- Accept-Ranges header is now removed when response is compressed.
- Fix ipv6_only socket flag.
- A new H2 connection is opened now if the existing connection returns GOAWAY with graceful shutdown error.
- Fix a FD mismatch error when 0.0.0.0 is used as the upstream IP

### ⚙️ Changes and Miscellaneous Tasks
- Dependency: replace `structopt` with `clap`
- Rework the API of HTTP modules
- Optimize remove_header() API call
- UDS parsing now requires the path to have `unix:` prefix. The support for the path without prefix is deprecated and will be removed on the next release.
- Other minor API changes

## [0.2.0](https://github.com/cloudflare/pingora/compare/0.1.1...0.2.0) - 2024-05-10

### 🚀 Features
- Add support for downstream h2 trailers and add an upstream h2 response trailer filter
- Add the ability to set TCP recv buf size
- Add a convenience function to retrieve Session digest
- Add `body_bytes_read()` method to Session
- Add `cache_not_modified_filter`
- Add `SSLKEYLOG` support for tls upstream
- Add `Service<HttpProxy<T>>` constructor for providing name
- Add `purge_response` callback
- Make `pop_closed` pub, to simplify DIY drains

### 🐛 Bug Fixes
- Fixed gRPC trailer proxying
- Fixed `response_body_filter` `end_of_stream` always being false
- Fixed compile error in Rust <= 1.73
- Fixed non linux build
- Fixed the counting problem of used_weight data field in `LruUnit<T>`
- Fixed `cargo run --example server` missing cert
- Fixed error log string interpolation outside of proper context
- Fixed tinylfu test flake

### ⚙️ Changes and Miscellaneous Tasks
- API change: `Server::run_forever` now takes ownership and ensures exit semantics
- API change: `cleanup()` method of `ServerApp` trait is now async
- Behavior change: Always return `HttpTask::Body` on body done instead of `HttpTask::done`
- Behavior change: HTTP/1 reason phrase is now parsed and proxied
- Updated `h2` dependency for RUSTSEC-2024-0332
- Updated zstd dependencies
- Code optimization and refactor in a few crates
- More examples and docs

## [0.1.1](https://github.com/cloudflare/pingora/compare/0.1.0...0.1.1) - 2024-04-05

### 🚀 Features
- `Server::new` now accepts `Into<Option<T>>`
- Implemented client `HttpSession::get_keepalive_values` for Keep-Alive parsing
- Expose `ListenFds` and `Fds` to fix a voldemort types issue
- Expose config options in `ServerConf`, provide new `Server` constructor
- `upstream_response_filter` now runs on upstream 304 responses during cache revalidation
- Added `server_addr` and `client_addr` APIs to `Session`
- Allow body modification in `response_body_filter`
- Allow configuring grace period and graceful shutdown timeout
- Added TinyUFO sharded skip list storage option

### 🐛 Bug Fixes
- Fixed build failures with the `boringssl` feature
- Fixed compile warnings with nightly Rust
- Fixed an issue where Upgrade request bodies might not be handled correctly
- Fix compilation to only include openssl or boringssl rather than both
- Fix OS read errors so they are reported as `ReadError` rather than `ReadTimeout` when reading http/1.1 response headers

### ⚙️ Miscellaneous Tasks
- Performance improvements in `pingora-ketama`
- Added more TinyUFO benchmarks
- Added tests for `pingora-cache` purge
- Limit buffer size for `InvalidHTTPHeader` error logs
- Example code: improvements in pingora client, new LB cluster example
- Typo fixes and clarifications across comments and docs

## [0.1.0] - 2024-02-28
### Highlights
- First Public Release of Pingora 🎉


================================================
FILE: Cargo.toml
================================================


[workspace]
resolver = "2"
members = [
    "pingora",
    "pingora-core",
    "pingora-pool",
    "pingora-error",
    "pingora-limits",
    "pingora-timeout",
    "pingora-header-serde",
    "pingora-proxy",
    "pingora-cache",
    "pingora-http",
    "pingora-lru",
    "pingora-openssl",
    "pingora-boringssl",
    "pingora-runtime",
    "pingora-rustls",
    "pingora-s2n",
    "pingora-ketama",
    "pingora-load-balancing",
    "pingora-memory-cache",
    "tinyufo",
]

[workspace.dependencies]
bstr = "1.12.0"
tokio = "1"
tokio-stream = { version = "0.1" }
async-trait = "0.1.42"
httparse = "1"
bytes = "1.0"
derivative = "2.2.0"
http = "1"
log = "0.4"
h2 = ">=0.4.11"
once_cell = "1"
lru = "0.16.3"
ahash = ">=0.8.9"

[profile.bench]
debug = true


================================================
FILE: Dockerfile
================================================
FROM debian:latest as builder

ARG BUILDARCH
RUN apt-get -qq update \
    && apt-get -qq install -y --no-install-recommends \
       gcc g++ libfindbin-libs-perl \
       make cmake libclang-dev git \
       wget curl gnupg ca-certificates lsb-release \
    && wget --no-check-certificate -O - https://openresty.org/package/pubkey.gpg | gpg --dearmor -o /usr/share/keyrings/openresty.gpg \
    && if [ "${BUILDARCH}" = "arm64" ]; then URL="http://openresty.org/package/arm64/debian"; else URL="http://openresty.org/package/debian"; fi \
    && echo "deb [arch=$BUILDARCH signed-by=/usr/share/keyrings/openresty.gpg] ${URL} $(lsb_release -sc) openresty" | tee /etc/apt/sources.list.d/openresty.list > /dev/null \
    && apt-get -qq update \
    && apt-get -qq install -y openresty --no-install-recommends

RUN curl https://sh.rustup.rs -sSf | sh -s -- -y
ENV PATH="/root/.cargo/bin:${PATH}"

WORKDIR /var/opt/pingora
COPY . .
RUN cargo build


================================================
FILE: LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: README.md
================================================
# Pingora

![Pingora banner image](./docs/assets/pingora_banner.png)

## What is Pingora
Pingora is a Rust framework to [build fast, reliable and programmable networked systems](https://blog.cloudflare.com/pingora-open-source).

Pingora is battle tested as it has been serving more than 40 million Internet requests per second for [more than a few years](https://blog.cloudflare.com/how-we-built-pingora-the-proxy-that-connects-cloudflare-to-the-internet).

## Feature highlights
* Async Rust: fast and reliable
* HTTP 1/2 end to end proxy
* TLS over OpenSSL, BoringSSL, s2n-tls, or rustls(experimental).
* gRPC and websocket proxying
* Graceful reload
* Customizable load balancing and failover strategies
* Support for a variety of observability tools

## Reasons to use Pingora
* **Security** is your top priority: Pingora is a more memory safe alternative for services that are written in C/C++
* Your service is **performance-sensitive**: Pingora is fast and efficient
* Your service requires extensive **customization**: The APIs Pingora proxy framework provides are highly programmable

# Getting started

See our [quick starting guide](./docs/quick_start.md) to see how easy it is to build a load balancer.

Our [user guide](./docs/user_guide/index.md) covers more topics such as how to configure and run Pingora servers, as well as how to build custom HTTP servers and proxy logic on top of Pingora's framework.

API docs are also available for all the crates.

# Notable crates in this workspace
* Pingora: the "public facing" crate to build networked systems and proxies
* Pingora-core: this crate defines the protocols, functionalities and basic traits
* Pingora-proxy: the logic and APIs to build HTTP proxies
* Pingora-error: the common error type used across Pingora crates
* Pingora-http: the HTTP header definitions and APIs
* Pingora-openssl & pingora-boringssl: SSL related extensions and APIs
* Pingora-ketama: the [Ketama](https://github.com/RJ/ketama) consistent algorithm
* Pingora-limits: efficient counting algorithms
* Pingora-load-balancing: load balancing algorithm extensions for pingora-proxy
* Pingora-memory-cache: Async in-memory caching with cache lock to prevent cache stampede
* Pingora-s2n: SSL extensions and APIs related to s2n-tls
* Pingora-timeout: A more efficient async timer system
* TinyUfo: The caching algorithm behind pingora-memory-cache

Note that Pingora proxy integration with caching should be considered experimental, and as such APIs related to caching are currently highly volatile.

# System requirements

## Systems
Linux is our tier 1 environment and main focus.

We will try our best for most code to compile for Unix environments. This is for developers and users to have an easier time developing with Pingora in Unix-like environments like macOS (though some features might be missing)

Windows support is preliminary by community's best effort only.

Both x86_64 and aarch64 architectures will be supported.

## Rust version

Pingora keeps a rolling MSRV (minimum supported Rust version) policy of 6 months. This means we will accept PRs that upgrade the MSRV as long as the new Rust version used is at least 6 months old. However, we generally will not bump the highest MSRV across the workspace without a sufficiently compelling reason.

Our current MSRV is 1.84.

Currently not all crates enforce `rust-version` as it is possible to use some crates on lower versions.

## Build Requirements

Some of the crates in this repository have dependencies on additional tools and
libraries that must be satisfied in order to build them:

* Make sure that [Clang] is installed on your system (for boringssl)
* Make sure that [Perl 5] is installed on your system (for openssl)

[Clang]:https://clang.llvm.org/
[Perl 5]:https://www.perl.org/

# Contributing
Please see our [contribution guidelines](./.github/CONTRIBUTING.md).

# License
This project is Licensed under [Apache License, Version 2.0](./LICENSE).


================================================
FILE: cliff.toml
================================================
# git-cliff ~ default configuration file
# https://git-cliff.org/docs/configuration
#
# Lines starting with "#" are comments.
# Configuration options are organized into tables and keys.
# See documentation for more information on available options.

[changelog]
# changelog header
header = """
# Changelog\n
All notable changes to this project will be documented in this file.\n
"""
# template for the changelog body
# https://keats.github.io/tera/docs/#introduction
body = """
{% if version %}\
  {% if previous.version %}\
    ## [{{ version | trim_start_matches(pat="v") }}](https://github.com/cloudflare/pingora/compare/{{ previous.version }}...{{ version }}) - {{ timestamp | date(format="%Y-%m-%d") }}
  {% else %}\
    ## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }}
  {% endif %}\
{% else %}\
    ## [unreleased]
{% endif %}\

### Highlights
  - Human-written change summaries go here

{% for group, commits in commits | group_by(attribute="group") %}
    ### {{ group | striptags | trim | upper_first }}
    {% for commit in commits %}
        - {% if commit.scope %}*({{ commit.scope }})* {% endif %}\
            {% if commit.breaking %}[**breaking**] {% endif %}\
            {{ commit.message | upper_first }}\
    {% endfor %}
{% endfor %}\n
"""
# template for the changelog footer
footer = """
"""
# remove the leading and trailing whitespace
trim = true

[git]
# parse the commits based on https://www.conventionalcommits.org
conventional_commits = true

# filter out the commits that are not conventional
filter_unconventional = false

# process each line of a commit as an individual commit
split_commits = false

# regex for preprocessing the commit messages
commit_preprocessors = [
  { pattern = '\n\w+(?:\-\w+)*:\s+[^\n]+', replace = "\n" },
  { pattern = '\n+', replace = "\n  " },
  { pattern = '\s+$', replace = "" }
]

# regex for parsing and grouping commits
commit_parsers = [
  { message = "^feat", group = "<!-- 0 -->🚀 Features" },
  { message = "^fix", group = "<!-- 1 -->🐛 Bug Fixes" },
  { message = "^doc", group = "<!-- 3 -->📚 Documentation", skip = true  },
  { message = "^perf", group = "<!-- 4 -->⚡ Performance" },
  { message = "^refactor", group = "<!-- 2 -->🚜 Refactor", skip = true  },
  { message = "^style", group = "<!-- 5 -->🎨 Styling", skip = true  },
  { message = "^test", group = "<!-- 6 -->🧪 Testing", skip = true  },
  { message = "^chore\\(release\\): prepare for", skip = true },
  { message = "^chore\\(deps.*\\)", skip = true },
  { message = "^chore\\(pr\\)", skip = true },
  { message = "^chore\\(pull\\)", skip = true },
  { message = "^chore|^ci", group = "<!-- 7 -->⚙️ Miscellaneous Tasks" },
  { body = ".*security", group = "<!-- 8 -->🛡️ Security" },
  { message = "^revert", group = "<!-- 9 -->◀️ Revert" },
  { message = '\S+(?:\s+\S+){6,}', group = "<!--10--> Everything Else" }
]

# protect breaking changes from being skipped due to matching a skipping commit_parser
protect_breaking_commits = false

# filter out the commits that are not matched by commit parsers
filter_commits = false
tag_pattern = "[0-9].[0-9].[0-9]"
topo_order = false

================================================
FILE: clippy.toml
================================================
msrv = "1.84"


================================================
FILE: docs/README.md
================================================
# Pingora User Manual

## Quick Start
In this section we show you how to build a bare-bones load balancer.

[Read the quick start here.](quick_start.md)

## User Guide
Covers how to configure and run Pingora servers, as well as how to build custom HTTP server and proxy logic on top of Pingora's framework.

[Read the user guide here.](user_guide/index.md)

## API Reference
TBD


================================================
FILE: docs/quick_start.md
================================================
# Quick Start: load balancer

## Introduction

This quick start shows how to build a bare-bones load balancer using pingora and pingora-proxy.

The goal of the load balancer is for every incoming HTTP request, select one of the two backends: https://1.1.1.1 and https://1.0.0.1 in a round-robin fashion.

## Build a basic load balancer

Create a new cargo project for our load balancer. Let's call it `load_balancer`

```
cargo new load_balancer
```

### Include the Pingora Crate and Basic Dependencies

In your project's `cargo.toml` file add the following to your dependencies
```
async-trait="0.1"
pingora = { version = "0.3", features = [ "lb" ] }
```

### Create a pingora server
First, let's create a pingora server. A pingora `Server` is a process which can host one or many
services. The pingora `Server` takes care of configuration and CLI argument parsing, daemonization,
signal handling, and graceful restart or shutdown.

The preferred usage is to initialize the `Server` in the `main()` function and
use `run_forever()` to spawn all the runtime threads and block the main thread until the server is
ready to exit.


```rust
use async_trait::async_trait;
use pingora::prelude::*;
use std::sync::Arc;

fn main() {
    let mut my_server = Server::new(None).unwrap();
    my_server.bootstrap();
    my_server.run_forever();
}
```

This will compile and run, but it doesn't do anything interesting.

### Create a load balancer proxy
Next let's create a load balancer. Our load balancer holds a static list of upstream IPs. The `pingora-load-balancing` crate already provides the `LoadBalancer` struct with common selection algorithms such as round robin and hashing. So let’s just use it. If the use case requires more sophisticated or customized server selection logic, users can simply implement it themselves in this function.


```rust
pub struct LB(Arc<LoadBalancer<RoundRobin>>);
```

In order to make the server a proxy, we need to implement the `ProxyHttp` trait for it.

Any object that implements the `ProxyHttp` trait essentially defines how a request is handled in
the proxy. The only required method in the `ProxyHttp` trait is `upstream_peer()` which returns
the address where the request should be proxied to.

In the body of the `upstream_peer()`, let's use the `select()` method for the `LoadBalancer` to round-robin across the upstream IPs. In this example we use HTTPS to connect to the backends, so we also need to specify to `use_tls` and set the SNI when constructing our [`Peer`](user_guide/peer.md)) object.

```rust
#[async_trait]
impl ProxyHttp for LB {

    /// For this small example, we don't need context storage
    type CTX = ();
    fn new_ctx(&self) -> () {
        ()
    }

    async fn upstream_peer(&self, _session: &mut Session, _ctx: &mut ()) -> Result<Box<HttpPeer>> {
        let upstream = self.0
            .select(b"", 256) // hash doesn't matter for round robin
            .unwrap();

        println!("upstream peer is: {upstream:?}");

        // Set SNI to one.one.one.one
        let peer = Box::new(HttpPeer::new(upstream, true, "one.one.one.one".to_string()));
        Ok(peer)
    }
}
```

In order for the 1.1.1.1 backends to accept our requests, a host header must be present. Adding this header
can be done by the `upstream_request_filter()` callback which modifies the request header after
the connection to the backends are established and before the request header is sent.

```rust
impl ProxyHttp for LB {
    // ...
    async fn upstream_request_filter(
        &self,
        _session: &mut Session,
        upstream_request: &mut RequestHeader,
        _ctx: &mut Self::CTX,
    ) -> Result<()> {
        upstream_request.insert_header("Host", "one.one.one.one").unwrap();
        Ok(())
    }
}
```


### Create a pingora-proxy service
Next, let's create a proxy service that follows the instructions of the load balancer above.

A pingora `Service` listens to one or multiple (TCP or Unix domain socket) endpoints. When a new connection is established
the `Service` hands the connection over to its "application." `pingora-proxy` is such an application
which proxies the HTTP request to the given backend as configured above.

In the example below, we create a `LB` instance with two backends `1.1.1.1:443` and `1.0.0.1:443`.
We put that `LB` instance to a proxy `Service` via the  `http_proxy_service()` call and then tell our
`Server` to host that proxy `Service`.

```rust
fn main() {
    let mut my_server = Server::new(None).unwrap();
    my_server.bootstrap();

    let upstreams =
        LoadBalancer::try_from_iter(["1.1.1.1:443", "1.0.0.1:443"]).unwrap();

    let mut lb = http_proxy_service(&my_server.configuration, LB(Arc::new(upstreams)));
        lb.add_tcp("0.0.0.0:6188");

    my_server.add_service(lb);

    my_server.run_forever();
}
```

### Run it

Now that we have added the load balancer to the service, we can run our new 
project with 

```cargo run```

To test it, simply send the server a few requests with the command:
```
curl 127.0.0.1:6188 -svo /dev/null
```

You can also navigate your browser to [http://localhost:6188](http://localhost:6188)

The following output shows that the load balancer is doing its job to balance across the two backends:
```
upstream peer is: Backend { addr: Inet(1.0.0.1:443), weight: 1 }
upstream peer is: Backend { addr: Inet(1.1.1.1:443), weight: 1 }
upstream peer is: Backend { addr: Inet(1.0.0.1:443), weight: 1 }
upstream peer is: Backend { addr: Inet(1.1.1.1:443), weight: 1 }
upstream peer is: Backend { addr: Inet(1.0.0.1:443), weight: 1 }
...
```

Well done! At this point you have a functional load balancer. It is a _very_ 
basic load balancer though, so the next section will walk you through how to
make it more robust with some built-in pingora tooling.

## Add functionality

Pingora provides several helpful features that can be enabled and configured 
with just a few lines of code. These range from simple peer health checks to 
the ability to seamlessly update running binary with zero service interruptions.

### Peer health checks

To make our load balancer more reliable, we would like to add some health checks 
to our upstream peers. That way if there is a peer that has gone down, we can 
quickly stop routing our traffic to that peer.

First let's see how our simple load balancer behaves when one of the peers is
down. To do this, we'll update the list of peers to include a peer that is 
guaranteed to be broken.

```rust
fn main() {
    // ...
    let upstreams =
        LoadBalancer::try_from_iter(["1.1.1.1:443", "1.0.0.1:443", "127.0.0.1:343"]).unwrap();
    // ...
}
```

Now if we run our load balancer again with `cargo run`, and test it with 

```
curl 127.0.0.1:6188 -svo /dev/null
```

We can see that one in every 3 request fails with `502: Bad Gateway`. This is 
because our peer selection is strictly following the `RoundRobin` selection 
pattern we gave it with no consideration to whether that peer is healthy. We can
fix this by adding a basic health check service. 

```rust
fn main() {
    let mut my_server = Server::new(None).unwrap();
    my_server.bootstrap();

    // Note that upstreams needs to be declared as `mut` now
    let mut upstreams =
        LoadBalancer::try_from_iter(["1.1.1.1:443", "1.0.0.1:443", "127.0.0.1:343"]).unwrap();

    let hc = TcpHealthCheck::new();
    upstreams.set_health_check(hc);
    upstreams.health_check_frequency = Some(std::time::Duration::from_secs(1));

    let background = background_service("health check", upstreams);
    let upstreams = background.task();

    // `upstreams` no longer need to be wrapped in an arc
    let mut lb = http_proxy_service(&my_server.configuration, LB(upstreams));
    lb.add_tcp("0.0.0.0:6188");

    my_server.add_service(background);

    my_server.add_service(lb);
    my_server.run_forever();
}
```

Now if we again run and test our load balancer, we see that all requests 
succeed and the broken peer is never used. Based on the configuration we used, 
if that peer were to become healthy again, it would be re-included in the round
robin again in within 1 second.

### Command line options

The pingora `Server` type provides a lot of built-in functionality that we can
take advantage of with single-line change. 

```rust
fn main() {
    let mut my_server = Server::new(Some(Opt::parse_args())).unwrap();
    ...
}
```

With this change, the command-line arguments passed to our load balancer will be 
consumed by Pingora. We can test this by running:

```
cargo run -- -h
```

We should see a help menu with the list of arguments now available to us. We 
will take advantage of those in the next sections to do more with our load 
balancer for free

### Running in the background

Passing the parameter `-d` or `--daemon` will tell the program to run in the background.

```
cargo run -- -d
```

To stop this service, you can send `SIGTERM` signal to it for a graceful shutdown, in which the service will stop accepting new request but try to finish all ongoing requests before exiting.
```
pkill -SIGTERM load_balancer
```
 (`SIGTERM` is the default signal for `pkill`.)

### Configurations
Pingora configuration files help define how to run the service. Here is an 
example config file that defines how many threads the service can have, the 
location of the pid file, the error log file, and the upgrade coordination 
socket (which we will explain later). Copy the contents below and put them into
a file called `conf.yaml` in your `load_balancer` project directory.

```yaml
---
version: 1
threads: 2
pid_file: /tmp/load_balancer.pid
error_log: /tmp/load_balancer_err.log
upgrade_sock: /tmp/load_balancer.sock
```

To use this conf file:
```
RUST_LOG=INFO cargo run -- -c conf.yaml -d
```
`RUST_LOG=INFO` is here so that the service actually populate the error log.

Now you can find the pid of the service.
```
 cat /tmp/load_balancer.pid
```

### Gracefully upgrade the service
(Linux only)

Let's say we changed the code of the load balancer and recompiled the binary. Now we want to upgrade the service running in the background to this newer version.

If we simply stop the old service, then start the new one, some request arriving in between could be lost. Fortunately, Pingora provides a graceful way to upgrade the service.

This is done by, first, send `SIGQUIT` signal to the running server, and then start the new server with the parameter `-u` \ `--upgrade`.

```
pkill -SIGQUIT load_balancer &&\
RUST_LOG=INFO cargo run -- -c conf.yaml -d -u
```

In this process, The old running server will wait and hand over its listening sockets to the new server. Then the old server runs until all its ongoing requests finish.

From a client's perspective, the service is always running because the listening socket is never closed.

## Full examples

The full code for this example is available in this repository under

[pingora-proxy/examples/load_balancer.rs](../pingora-proxy/examples/load_balancer.rs)

Other examples that you may find helpful are also available here

[pingora-proxy/examples/](../pingora-proxy/examples/)
[pingora/examples](../pingora/examples/)

================================================
FILE: docs/user_guide/conf.md
================================================
# Configuration

A Pingora configuration file is a list of Pingora settings in yaml format.

Example
```yaml
---
version: 1
threads: 2
pid_file: /run/pingora.pid
upgrade_sock: /tmp/pingora_upgrade.sock
user: nobody
group: webusers
```
## Settings
| Key      | meaning        | value type |
| ------------- |-------------| ----|
| version | the version of the conf, currently it is a constant `1` | number |
| pid_file | The path to the pid file | string |
| daemon | whether to run the server in the background | bool |
| error_log | the path to error log output file. STDERR is used if not set | string |
| upgrade_sock | the path to the upgrade socket. | string |
| threads | number of threads per service | number |
| user | the user the pingora server should be run under after daemonization | string |
| group | the group the pingora server should be run under after daemonization | string |
| client_bind_to_ipv4 | source IPv4 addresses to bind to when connecting to server | list of string |
| client_bind_to_ipv6 | source IPv6 addresses to bind to when connecting to server| list of string |
| ca_file | The path to the root CA file | string |
| s2n_config_cache_size | The maximum number of unique s2n configs to cache. A value of 0 disables the cache. Default: 10 (s2n-tls only) | number |
| work_stealing | Enable work stealing runtime (default true). See Pingora runtime (WIP) section for more info | bool |
| upstream_keepalive_pool_size | The number of total connections to keep in the connection pool | number |

## Extension
Any unknown settings will be ignored. This allows extending the conf file to add and pass user defined settings. See User defined configuration section.


================================================
FILE: docs/user_guide/ctx.md
================================================
# Sharing state across phases with `CTX`

## Using `CTX`
The custom filters users implement in different phases of the request don't interact with each other directly. In order to share information and state across the filters, users can define a `CTX` struct. Each request owns a single `CTX` object. All the filters are able to read and update members of the `CTX` object. The CTX object will be dropped at the end of the request.

### Example

In the following example, the proxy parses the request header in the `request_filter` phase, it stores the boolean flag so that later in the `upstream_peer` phase the flag is used to decide which server to route traffic to. (Technically, the header can be parsed in `upstream_peer` phase, but we just do it in an earlier phase just for the demonstration.)

```Rust
pub struct MyProxy();

pub struct MyCtx {
    beta_user: bool,
}

fn check_beta_user(req: &pingora_http::RequestHeader) -> bool {
    // some simple logic to check if user is beta
    req.headers.get("beta-flag").is_some()
}

#[async_trait]
impl ProxyHttp for MyProxy {
    type CTX = MyCtx;
    fn new_ctx(&self) -> Self::CTX {
        MyCtx { beta_user: false }
    }

    async fn request_filter(&self, session: &mut Session, ctx: &mut Self::CTX) -> Result<bool> {
        ctx.beta_user = check_beta_user(session.req_header());
        Ok(false)
    }

    async fn upstream_peer(
        &self,
        _session: &mut Session,
        ctx: &mut Self::CTX,
    ) -> Result<Box<HttpPeer>> {
        let addr = if ctx.beta_user {
            info!("I'm a beta user");
            ("1.0.0.1", 443)
        } else {
            ("1.1.1.1", 443)
        };

        let peer = Box::new(HttpPeer::new(addr, true, "one.one.one.one".to_string()));
        Ok(peer)
    }
}
```

## Sharing state across requests
Sharing state such as a counter, cache and other info across requests is common. There is nothing special needed for sharing resources and data across requests in Pingora. `Arc`, `static` or any other mechanism can be used.


### Example
Let's modify the example above to track the number of beta visitors as well as the number of total visitors. The counters can either be defined in the `MyProxy` struct itself or defined as a global variable. Because the counters can be concurrently accessed, Mutex is used here.

```Rust
// global counter
static REQ_COUNTER: Mutex<usize> = Mutex::new(0);

pub struct MyProxy {
    // counter for the service
    beta_counter: Mutex<usize>, // AtomicUsize works too
}

pub struct MyCtx {
    beta_user: bool,
}

fn check_beta_user(req: &pingora_http::RequestHeader) -> bool {
    // some simple logic to check if user is beta
    req.headers.get("beta-flag").is_some()
}

#[async_trait]
impl ProxyHttp for MyProxy {
    type CTX = MyCtx;
    fn new_ctx(&self) -> Self::CTX {
        MyCtx { beta_user: false }
    }

    async fn request_filter(&self, session: &mut Session, ctx: &mut Self::CTX) -> Result<bool> {
        ctx.beta_user = check_beta_user(session.req_header());
        Ok(false)
    }

    async fn upstream_peer(
        &self,
        _session: &mut Session,
        ctx: &mut Self::CTX,
    ) -> Result<Box<HttpPeer>> {
        let mut req_counter = REQ_COUNTER.lock().unwrap();
        *req_counter += 1;

        let addr = if ctx.beta_user {
            let mut beta_count = self.beta_counter.lock().unwrap();
            *beta_count += 1;
            info!("I'm a beta user #{beta_count}");
            ("1.0.0.1", 443)
        } else {
            info!("I'm an user #{req_counter}");
            ("1.1.1.1", 443)
        };

        let peer = Box::new(HttpPeer::new(addr, true, "one.one.one.one".to_string()));
        Ok(peer)
    }
}
```

The complete example can be found under [`pingora-proxy/examples/ctx.rs`](../../pingora-proxy/examples/ctx.rs). You can run it using `cargo`:
```
RUST_LOG=INFO cargo run --example ctx
```

================================================
FILE: docs/user_guide/daemon.md
================================================
# Daemonization

When a Pingora server is configured to run as a daemon, after its bootstrapping, it will move itself to the background and optionally change to run under the configured user and group. The `pid_file` option comes handy in this case for the user to track the PID of the daemon in the background.

Daemonization also allows the server to perform privileged actions like loading secrets and then switch to an unprivileged user before accepting any requests from the network.

This process happens in the `run_forever()` call. Because daemonization involves `fork()`, certain things like threads created before this call are likely lost.


================================================
FILE: docs/user_guide/error_log.md
================================================
# Error logging

Pingora libraries are built to expect issues like disconnects, timeouts and invalid inputs from the network. A common way to record these issues are to output them in error log (STDERR or log files).

## Log level guidelines
Pingora adopts the idea behind [log](https://docs.rs/log/latest/log/). There are five log levels:
* `error`: This level should be used when the error stops the request from being handled correctly. For example when the server we try to connect to is offline.
* `warning`: This level should be used when an error occurs but the system recovers from it. For example when the primary DNS timed out but the system is able to query the secondary DNS.
* `info`: Pingora logs when the server is starting up or shutting down.
* `debug`: Internal details. This log level is not compiled in `release` builds.
* `trace`: Fine-grained internal details. This log level is not compiled in `release` builds.

The pingora-proxy crate has a well-defined interface to log errors, so that users don't have to manually log common proxy errors. See its guide for more details.


================================================
FILE: docs/user_guide/errors.md
================================================
# How to return errors

For easy error handling, the `pingora-error` crate exports a custom `Result` type used throughout other Pingora crates.

The `Error` struct used in this `Result`'s error variant is a wrapper around arbitrary error types. It allows the user to tag the source of the underlying error and attach other custom context info.

Users will often need to return errors by propagating an existing error or creating a wholly new one. `pingora-error` makes this easy with its error building functions.

## Examples

For example, one could return an error when an expected header is not present:

```rust
fn validate_req_header(req: &RequestHeader) -> Result<()> {
    // validate that the `host` header exists
    req.headers()
        .get(http::header::HOST)
        .ok_or_else(|| Error::explain(InvalidHTTPHeader, "No host header detected"))
}

impl MyServer {
    pub async fn handle_request_filter(
        &self,
        http_session: &mut Session,
        ctx: &mut CTX,
    ) -> Result<bool> {
        validate_req_header(session.req_header()?).or_err(HTTPStatus(400), "Missing required headers")?;
        Ok(true)
    }
}
```

`validate_req_header` returns an `Error` if the `host` header is not found, using `Error::explain` to create a new `Error` along with an associated type (`InvalidHTTPHeader`) and helpful context that may be logged in an error log.

This error will eventually propagate to the request filter, where it is returned as a new `HTTPStatus` error using `or_err`. (As part of the default pingora-proxy `fail_to_proxy()` phase, not only will this error be logged, but it will result in sending a `400 Bad Request` response downstream.)

Note that the original causing error will be visible in the error logs as well. `or_err` wraps the original causing error in a new one with additional context, but `Error`'s `Display` implementation also prints the chain of causing errors.

## Guidelines

An error has a _type_ (e.g. `ConnectionClosed`), a _source_ (e.g. `Upstream`, `Downstream`, `Internal`), and optionally, a _cause_ (another wrapped error) and a _context_ (arbitrary user-provided string details).

A minimal error can be created using functions like `new_in` / `new_up` / `new_down`, each of which specifies a source and asks the user to provide a type.

Generally speaking:
* To create a new error, without a direct cause but with more context, use `Error::explain`. You can also use `explain_err` on a `Result` to replace the potential error inside it with a new one.
* To wrap a causing error in a new one with more context, use `Error::because`. You can also use `or_err` on a `Result` to replace the potential error inside it by wrapping the original one.

## Retry

Errors can be "retry-able." If the error is retry-able, pingora-proxy will be allowed to retry the upstream request. Some errors are only retry-able on [reused connections](pooling.md), e.g. to handle situations where the remote end has dropped a connection we attempted to reuse.

By default a newly created `Error` either takes on its direct causing error's retry status, or, if left unspecified, is considered not retry-able.


================================================
FILE: docs/user_guide/failover.md
================================================
# Handling failures and failover

Pingora-proxy allows users to define how to handle failures throughout the life of a proxied request.

When a failure happens before the response header is sent downstream, users have a few options:
1. Send an error page downstream and then give up.
2. Retry the same upstream again.
3. Try another upstream if applicable.

Otherwise, once the response header is already sent downstream, there is nothing the proxy can do other than logging an error and then giving up on the request.


## Retry / Failover
In order to implement retry or failover, `fail_to_connect()` / `error_while_proxy()` needs to mark the error as "retry-able." For failover, `fail_to_connect() / error_while_proxy()` also needs to update the `CTX` to tell `upstream_peer()` not to use the same `Peer` again.

### Safety
In general, idempotent HTTP requests, e.g., `GET`, are safe to retry. Other requests, e.g., `POST`, are not safe to retry if the requests have already been sent. When `fail_to_connect()` is called, pingora-proxy guarantees that nothing was sent upstream. Users are not recommended to retry a non-idempotent request after `error_while_proxy()` unless they know the upstream server enough to know whether it is safe.

### Example
In the following example we set a `tries` variable on the `CTX` to track how many connection attempts we've made. When setting our peer in `upstream_peer` we check if `tries` is less than one and connect to 192.0.2.1. On connect failure we increment `tries` in `fail_to_connect` and set `e.set_retry(true)` which tells Pingora this is a retryable error. On retry, we enter `upstream_peer` again and this time connect to 1.1.1.1. If we're unable to connect to 1.1.1.1 we return a 502 since we only set `e.set_retry(true)` in `fail_to_connect` when `tries` is zero.

```Rust
pub struct MyProxy();

pub struct MyCtx {
    tries: usize,
}

#[async_trait]
impl ProxyHttp for MyProxy {
    type CTX = MyCtx;
    fn new_ctx(&self) -> Self::CTX {
        MyCtx { tries: 0 }
    }

    fn fail_to_connect(
        &self,
        _session: &mut Session,
        _peer: &HttpPeer,
        ctx: &mut Self::CTX,
        mut e: Box<Error>,
    ) -> Box<Error> {
        if ctx.tries > 0 {
            return e;
        }
        ctx.tries += 1;
        e.set_retry(true);
        e
    }

    async fn upstream_peer(
        &self,
        _session: &mut Session,
        ctx: &mut Self::CTX,
    ) -> Result<Box<HttpPeer>> {
        let addr = if ctx.tries < 1 {
            ("192.0.2.1", 443)
        } else {
            ("1.1.1.1", 443)
        };

        let mut peer = Box::new(HttpPeer::new(addr, true, "one.one.one.one".to_string()));
        peer.options.connection_timeout = Some(Duration::from_millis(100));
        Ok(peer)
    }
}
```


================================================
FILE: docs/user_guide/graceful.md
================================================
# Graceful restart and shutdown

Graceful restart, upgrade, and shutdown mechanisms are very commonly used to avoid errors or downtime when releasing new versions of Pingora servers.

Pingora graceful upgrade mechanism guarantees the following:
* A request is guaranteed to be handled either by the old server instance or the new one. No request will see connection refused when trying to connect to the server endpoints.
* A request that can finish within the grace period is guaranteed not to be terminated.

## How to graceful upgrade
### Step 0
Configure the upgrade socket. The old and new server need to agree on the same path to this socket. See configuration manual for details.

### Step 1
Start the new instance with the `--upgrade` CLI option. The new instance will not try to listen to the service endpoint right away. It will try to acquire the listening socket from the old instance instead.

### Step 2
Send SIGQUIT signal to the old instance. The old instance will start to transfer the listening socket to the new instance.

Once step 2 is successful, the new instance will start to handle new incoming connections right away. Meanwhile, the old instance will enter its graceful shutdown mode. It waits a short period of time (to give the new instance time to initialize and prepare to handle traffic), after which it will not accept any new connections.


================================================
FILE: docs/user_guide/index.md
================================================
# User Guide

In this guide, we will cover the most used features, operations and settings of Pingora.

## Running Pingora servers
* [Start and stop](start_stop.md)
* [Graceful restart and graceful shutdown](graceful.md)
* [Configuration](conf.md)
* [Daemonization](daemon.md)
* [Systemd integration](systemd.md)
* [Handling panics](panic.md)
* [Error logging](error_log.md)
* [Prometheus](prom.md)

## Building HTTP proxies
* [Life of a request: `pingora-proxy` phases and filters](phase.md)
* [`Peer`: how to connect to upstream](peer.md)
* [Sharing state across phases with `CTX`](ctx.md)
* [How to return errors](errors.md)
* [Examples: take control of the request](modify_filter.md)
* [Connection pooling and reuse](pooling.md)
* [Handling failures and failover](failover.md)
* [RateLimiter quickstart](rate_limiter.md)

## Advanced topics (WIP)
* [Pingora internals](internals.md)
* Using BoringSSL
* User defined configuration
* Pingora async runtime and threading model
* Background Service
* Blocking code in async context
* Tracing


================================================
FILE: docs/user_guide/internals.md
================================================
# Pingora Internals

(Special thanks to [James Munns](https://github.com/jamesmunns) for writing this section)


## Starting the `Server`

The pingora system starts by spawning a *server*. The server is responsible for starting *services*, and listening for termination events.

```
                               ┌───────────┐
                    ┌─────────>│  Service  │
                    │          └───────────┘
┌────────┐          │          ┌───────────┐
│ Server │──Spawns──┼─────────>│  Service  │
└────────┘          │          └───────────┘
                    │          ┌───────────┐
                    └─────────>│  Service  │
                               └───────────┘
```

After spawning the *services*, the server continues to listen to a termination event, which it will propagate to the created services.

## Services

*Services* are entities that handle listening to given sockets, and perform the core functionality. A *service* is tied to a particular protocol and set of options.

> NOTE: there are also "background" services, which just do *stuff*, and aren't necessarily listening to a socket. For now we're just talking about listener services.

Each service has its own threadpool/tokio runtime, with a number of threads based on the configured value. Worker threads are not shared cross-service. Service runtime threadpools may be work-stealing (tokio-default), or non-work-stealing (N isolated single threaded runtimes).

```
┌─────────────────────────┐
│ ┌─────────────────────┐ │
│ │┌─────────┬─────────┐│ │
│ ││  Conn   │  Conn   ││ │
│ │├─────────┼─────────┤│ │
│ ││Endpoint │Endpoint ││ │
│ │├─────────┴─────────┤│ │
│ ││     Listeners     ││ │
│ │├─────────┬─────────┤│ │
│ ││ Worker  │ Worker  ││ │
│ ││ Thread  │ Thread  ││ │
│ │├─────────┴─────────┤│ │
│ ││  Tokio Executor   ││ │
│ │└───────────────────┘│ │
│ └─────────────────────┘ │
│ ┌───────┐               │
└─┤Service├───────────────┘
  └───────┘
```

## Service Listeners

At startup, each Service is assigned a set of downstream endpoints that they listen to. A single service may listen to more than one endpoint. The Server also passes along any relevant configuration, including TLS settings if relevant.

These endpoints are converted into listening sockets, called `TransportStack`s. Each `TransportStack` is assigned to an async task within that service's executor.

```
                                 ┌───────────────────┐
                                 │┌─────────────────┐│    ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐  ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─
 ┌─────────┐                     ││ TransportStack  ││                                ┌────────────────────┐│
┌┤Listeners├────────┐            ││                 ││    │                       │  ││                    │
│└─────────┘        │            ││ (Listener, TLS  │├──────spawn(run_endpoint())────>│ Service<ServerApp> ││
│┌─────────────────┐│            ││    Acceptor,    ││    │                       │  ││                    │
││    Endpoint     ││            ││   UpgradeFDs)   ││                                └────────────────────┘│
││   addr/ports    ││            │├─────────────────┤│    │                       │  │
││ + TLS Settings  ││            ││ TransportStack  ││                                ┌────────────────────┐│
│├─────────────────┤│            ││                 ││    │                       │  ││                    │
││    Endpoint     ││──build()─> ││ (Listener, TLS  │├──────spawn(run_endpoint())────>│ Service<ServerApp> ││
││   addr/ports    ││            ││    Acceptor,    ││    │                       │  ││                    │
││ + TLS Settings  ││            ││   UpgradeFDs)   ││                                └────────────────────┘│
│├─────────────────┤│            │├─────────────────┤│    │                       │  │
││    Endpoint     ││            ││ TransportStack  ││                                ┌────────────────────┐│
││   addr/ports    ││            ││                 ││    │                       │  ││                    │
││ + TLS Settings  ││            ││ (Listener, TLS  │├──────spawn(run_endpoint())────>│ Service<ServerApp> ││
│└─────────────────┘│            ││    Acceptor,    ││    │                       │  ││                    │
└───────────────────┘            ││   UpgradeFDs)   ││                                └────────────────────┘│
                                 │└─────────────────┘│    │ ┌───────────────┐     │  │ ┌──────────────┐
                                 └───────────────────┘     ─│start_service()│─ ─ ─    ─│ Worker Tasks ├ ─ ─ ┘
                                                            └───────────────┘          └──────────────┘
```

## Downstream connection lifecycle

Each service processes incoming connections by spawning a task-per-connection. These connections are held open
as long as there are new events to be handled.

```
                                  ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐

                                  │  ┌───────────────┐   ┌────────────────┐   ┌─────────────────┐    ┌─────────────┐  │
┌────────────────────┐               │ UninitStream  │   │    Service     │   │       App       │    │  Task Ends  │
│                    │            │  │ ::handshake() │──>│::handle_event()│──>│ ::process_new() │──┬>│             │  │
│ Service<ServerApp> │──spawn()──>   └───────────────┘   └────────────────┘   └─────────────────┘  │ └─────────────┘
│                    │            │                                                    ▲           │                  │
└────────────────────┘                                                                 │         while
                                  │                                                    └─────────reuse                │
                                     ┌───────────────────────────┐
                                  └ ─│  Task on Service Runtime  │─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘
                                     └───────────────────────────┘
```

## What is a proxy then?

Interestingly, the `pingora` `Server` itself has no particular notion of a Proxy.

Instead, it only thinks in terms of `Service`s, which are expected to contain a particular implementor of the `ServiceApp` trait.

For example, this is how an `HttpProxy` struct, from the `pingora-proxy` crate, "becomes" a `Service` spawned by the `Server`:

```
┌─────────────┐
│  HttpProxy  │
│  (struct)   │
└─────────────┘
       │
   implements   ┌─────────────┐
       │        │HttpServerApp│
       └───────>│   (trait)   │
                └─────────────┘
                       │
                   implements   ┌─────────────┐
                       │        │  ServerApp  │
                       └───────>│   (trait)   │
                                └─────────────┘
                                       │
                                   contained    ┌─────────────────────┐
                                     within     │                     │
                                       └───────>│ Service<ServiceApp> │
                                                │                     │
                                                └─────────────────────┘
```

Different functionalities and helpers are provided at different layers in this representation.

```
┌─────────────┐        ┌──────────────────────────────────────┐
│  HttpProxy  │        │Handles high level Proxying workflow, │
│  (struct)   │─ ─ ─ ─ │   customizable via ProxyHttp trait   │
└──────┬──────┘        └──────────────────────────────────────┘
       │
┌──────▼──────┐        ┌──────────────────────────────────────┐
│HttpServerApp│        │ Handles selection of H1 vs H2 stream │
│   (trait)   │─ ─ ─ ─ │     handling, incl H2 handshake      │
└──────┬──────┘        └──────────────────────────────────────┘
       │
┌──────▼──────┐        ┌──────────────────────────────────────┐
│  ServerApp  │        │ Handles dispatching of App instances │
│   (trait)   │─ ─ ─ ─ │   as individual tasks, per Session   │
└──────┬──────┘        └──────────────────────────────────────┘
       │
┌──────▼──────┐        ┌──────────────────────────────────────┐
│ Service<A>  │        │ Handles dispatching of App instances │
│  (struct)   │─ ─ ─ ─ │  as individual tasks, per Listener   │
└─────────────┘        └──────────────────────────────────────┘
```

The `HttpProxy` struct handles the high level workflow of proxying an HTTP connection

It uses the `ProxyHttp` (note the flipped wording order!) **trait** to allow customization
at each of the following steps (note: taken from [the phase chart](./phase_chart.md) doc):

```mermaid
 graph TD;
    start("new request")-->request_filter;
    request_filter-->upstream_peer;

    upstream_peer-->Connect{{IO: connect to upstream}};

    Connect--connection success-->connected_to_upstream;
    Connect--connection failure-->fail_to_connect;

    connected_to_upstream-->upstream_request_filter;
    upstream_request_filter --> SendReq{{IO: send request to upstream}};
    SendReq-->RecvResp{{IO: read response from upstream}};
    RecvResp-->upstream_response_filter-->response_filter-->upstream_response_body_filter-->response_body_filter-->logging-->endreq("request done");

    fail_to_connect --can retry-->upstream_peer;
    fail_to_connect --can't retry-->fail_to_proxy--send error response-->logging;

    RecvResp--failure-->IOFailure;
    SendReq--failure-->IOFailure;
    error_while_proxy--can retry-->upstream_peer;
    error_while_proxy--can't retry-->fail_to_proxy;

    request_filter --send response-->logging


    Error>any response filter error]-->error_while_proxy
    IOFailure>IO error]-->error_while_proxy

```

## Zooming out

Before we zoom in, it's probably good to zoom out and remind ourselves how
a proxy generally works:

```
┌────────────┐          ┌─────────────┐         ┌────────────┐
│ Downstream │          │    Proxy    │         │  Upstream  │
│   Client   │─────────>│             │────────>│   Server   │
└────────────┘          └─────────────┘         └────────────┘
```

The proxy will be taking connections from the **Downstream** client, and (if
everything goes right), establishing a connection with the appropriate
**Upstream** server. This selected upstream server is referred to as
the **Peer**.

Once the connection is established, the Downstream and Upstream can communicate
bidirectionally.

So far, the discussion of Server, Services, and Listeners have focused on the LEFT
half of this diagram, handling incoming Downstream connections, and getting it TO
the proxy component.

Next, we'll look at the RIGHT half of this diagram, connecting to Upstreams.

## Managing the Upstream

Connections to Upstream Peers are made through `Connector`s. This is not a specific type or trait, but more
of a "style".

Connectors are responsible for a few things:

* Establishing a connection with a Peer
* Maintaining a connection pool with the Peer, allowing for connection reuse across:
    * Multiple requests from a single downstream client
    * Multiple requests from different downstream clients
* Measuring health of connections, for connections like H2, which perform regular pings
* Handling protocols with multiple poolable layers, like H2
* Caching, if relevant to the protocol and enabled
* Compression, if relevant to the protocol and enabled

Now in context, we can see how each end of the Proxy is handled:

```
┌────────────┐          ┌─────────────┐         ┌────────────┐
│ Downstream │       ┌ ─│─   Proxy  ┌ ┼ ─       │  Upstream  │
│   Client   │─────────>│ │           │──┼─────>│   Server   │
└────────────┘       │  └───────────┼─┘         └────────────┘
                      ─ ─ ┘          ─ ─ ┘
                        ▲              ▲
                     ┌──┘              └──┐
                     │                    │
                ┌ ─ ─ ─ ─ ┐         ┌ ─ ─ ─ ─ ─
                 Listeners           Connectors│
                └ ─ ─ ─ ─ ┘         └ ─ ─ ─ ─ ─
```

## What about multiple peers?

`Connectors` only handle the connection to a single peer, so selecting one of potentially multiple Peers
is actually handled one level up, in the `upstream_peer()` method of the `ProxyHttp` trait.


================================================
FILE: docs/user_guide/modify_filter.md
================================================
# Examples: taking control of the request

In this section we will go through how to route, modify or reject requests.

## Routing
Any information from the request can be used to make routing decision. Pingora doesn't impose any constraints on how users could implement their own routing logic.

In the following example, the proxy sends traffic to 1.0.0.1 only when the request path start with `/family/`. All the other requests are routed to 1.1.1.1.

```Rust
pub struct MyGateway;

#[async_trait]
impl ProxyHttp for MyGateway {
    type CTX = ();
    fn new_ctx(&self) -> Self::CTX {}

    async fn upstream_peer(
        &self,
        session: &mut Session,
        _ctx: &mut Self::CTX,
    ) -> Result<Box<HttpPeer>> {
        let addr = if session.req_header().uri.path().starts_with("/family/") {
            ("1.0.0.1", 443)
        } else {
            ("1.1.1.1", 443)
        };

        info!("connecting to {addr:?}");

        let peer = Box::new(HttpPeer::new(addr, true, "one.one.one.one".to_string()));
        Ok(peer)
    }
}
```


## Modifying headers

Both request and response headers can be added, removed or modified in their corresponding phases. In the following example, we add logic to the `response_filter` phase to update the `Server` header and remove the `alt-svc` header.

```Rust
#[async_trait]
impl ProxyHttp for MyGateway {
    ...
    async fn response_filter(
        &self,
        _session: &mut Session,
        upstream_response: &mut ResponseHeader,
        _ctx: &mut Self::CTX,
    ) -> Result<()>
    where
        Self::CTX: Send + Sync,
    {
        // replace existing header if any
        upstream_response
            .insert_header("Server", "MyGateway")
            .unwrap();
        // because we don't support h3
        upstream_response.remove_header("alt-svc");

        Ok(())
    }
}
```

## Return Error pages

Sometimes instead of proxying the traffic, under certain conditions, such as authentication failures, you might want the proxy to just return an error page.

```Rust
fn check_login(req: &pingora_http::RequestHeader) -> bool {
    // implement you logic check logic here
    req.headers.get("Authorization").map(|v| v.as_bytes()) == Some(b"password")
}

#[async_trait]
impl ProxyHttp for MyGateway {
    ...
    async fn request_filter(&self, session: &mut Session, _ctx: &mut Self::CTX) -> Result<bool> {
        if session.req_header().uri.path().starts_with("/login")
            && !check_login(session.req_header())
        {
            let _ = session.respond_error(403).await;
            // true: tell the proxy that the response is already written
            return Ok(true);
        }
        Ok(false)
    }
```
## Logging

Logging logic can be added to the `logging` phase of Pingora. The logging phase runs on every request right before Pingora proxy finish processing it. This phase runs for both successful and failed requests.

In the example below, we add Prometheus metric and access logging to the proxy. In order for the metrics to be scraped, we also start a Prometheus metric server on a different port.


``` Rust
pub struct MyGateway {
    req_metric: prometheus::IntCounter,
}

#[async_trait]
impl ProxyHttp for MyGateway {
    ...
    async fn logging(
        &self,
        session: &mut Session,
        _e: Option<&pingora::Error>,
        ctx: &mut Self::CTX,
    ) {
        let response_code = session
            .response_written()
            .map_or(0, |resp| resp.status.as_u16());
        // access log
        info!(
            "{} response code: {response_code}",
            self.request_summary(session, ctx)
        );

        self.req_metric.inc();
    }

fn main() {
   ...
    let mut prometheus_service_http =
        pingora::services::listening::Service::prometheus_http_service();
    prometheus_service_http.add_tcp("127.0.0.1:6192");
    my_server.add_service(prometheus_service_http);

    my_server.run_forever();
}
```

================================================
FILE: docs/user_guide/panic.md
================================================
# Handling panics

Any panic that happens to particular requests does not affect other ongoing requests or the server's ability to handle other requests. Sockets acquired by the panicking requests are dropped (closed). The panics will be captured by the tokio runtime and then ignored.

In order to monitor the panics, Pingora server has built-in Sentry integration.
```rust
my_server.sentry = Some(
    sentry::ClientOptions{
        dsn: "SENTRY_DSN".into_dsn().unwrap(),
        ..Default::default()
    }
);
```

Even though a panic is not fatal in Pingora, it is still not the preferred way to handle failures like network timeouts. Panics should be reserved for unexpected logic errors.


================================================
FILE: docs/user_guide/peer.md
================================================
# `Peer`: how to connect to upstream

In the `upstream_peer()` phase the user should return a `Peer` object which defines how to connect to a certain upstream.

## `Peer`
A `HttpPeer` defines which upstream to connect to.
| attribute      | meaning        |
| ------------- |-------------|
|address: `SocketAddr`| The IP:Port to connect to |
|scheme: `Scheme`| Http or Https |
|sni: `String`| The SNI to use, Https only |
|proxy: `Option<Proxy>`| The setting to proxy the request through a [CONNECT proxy](https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/CONNECT) |
|client_cert_key: `Option<Arc<CertKey>>`| The client certificate to use in mTLS connections to upstream |
|options: `PeerOptions`| See below |


## `PeerOptions`
A `PeerOptions` defines how to connect to the upstream.
| attribute      | meaning        |
| ------------- |-------------|
|bind_to: `Option<InetSocketAddr>`| Which local address to bind to as the client IP |
|connection_timeout: `Option<Duration>`| How long to wait before giving up *establishing* a TCP connection |
|total_connection_timeout: `Option<Duration>`| How long to wait before giving up *establishing* a connection including TLS handshake time |
|read_timeout: `Option<Duration>`| How long to wait before each individual `read()` from upstream. The timer is reset after each `read()` |
|idle_timeout: `Option<Duration>`| How long to wait before closing a idle connection waiting for connection reuse |
|write_timeout: `Option<Duration>`| How long to wait before a `write()` to upstream finishes |
|verify_cert: `bool`| Whether to check if upstream' server cert is valid and validated |
|verify_hostname: `bool`| Whether to check if upstream server cert's CN matches the SNI |
|use_system_certs: `bool`| Whether the system trust store should be loaded and used when verifying certificates. Impacts performance (s2n-tls only) |
|alternative_cn: `Option<String>`| Accept the cert if the CN matches this name |
|alpn: `ALPN`| Which HTTP protocol to advertise during ALPN, http1.1 and/or http2 |
|ca: `Option<Arc<Box<[X509]>>>`| Which Root CA to use to validate the server's cert |
|psk: `Option<Arc<PskConfig>>` | The PSK configuration to use in [PSK-TLS](https://datatracker.ietf.org/doc/html/rfc4279) handshakes (s2n-tls only) |
|s2n_security_policy: `Option<S2NPolicy>` | S2N [Security Policy](https://aws.github.io/s2n-tls/usage-guide/ch06-security-policies.html) to use. Defaults to `default_tls13` if undefined. (s2n-tls only) |
|max_blinding_delay: `Option<u32>` | S2N-TLS will delay a response up to the [max blinding delay](https://aws.github.io/s2n-tls/usage-guide/ch03-error-handling.html#blinding) (default 30) seconds whenever an error triggered by a peer occurs to mitigate against timing side channels. (s2n-tls only) |
|tcp_keepalive: `Option<TcpKeepalive>`| TCP keepalive settings to upstream |

## Examples
TBD


================================================
FILE: docs/user_guide/phase.md
================================================
# Life of a request: pingora-proxy phases and filters

## Intro
The pingora-proxy HTTP proxy framework supports highly programmable proxy behaviors. This is done by allowing users to inject custom logic into different phases (stages) in the life of a request.

## Life of a proxied HTTP request
1. The life of a proxied HTTP request starts when the proxy reads the request header from the **downstream** (i.e., the client).
2. Then, the proxy connects to the **upstream** (i.e., the remote server). This step is skipped if there is a previously established [connection to reuse](pooling.md).
3. The proxy then sends the request header to the upstream.
4. Once the request header is sent, the proxy enters a duplex mode, which simultaneously proxies:
    a. upstream response (both header and body) to the downstream, and
    b. downstream request body to upstream (if any).
5. Once the entire request/response finishes, the life of the request is ended. All resources are released. The downstream connections and the upstream connections are recycled to be reused if applicable.

## Pingora-proxy phases and filters
Pingora-proxy allows users to insert arbitrary logic into the life of a request.
```mermaid
 graph TD;
    start("new request")-->early_request_filter;
    early_request_filter-->request_filter;
    request_filter-->upstream_peer;

    upstream_peer-->Connect{{IO: connect to upstream}};

    Connect--connection success-->connected_to_upstream;
    Connect--connection failure-->fail_to_connect;

    connected_to_upstream-->upstream_request_filter;
    upstream_request_filter --> request_body_filter;
    request_body_filter --> SendReq{{IO: send request to upstream}};
    SendReq-->RecvResp{{IO: read response from upstream}};
    RecvResp-->upstream_response_filter-->response_filter-->upstream_response_body_filter-->response_body_filter-->logging-->endreq("request done");

    fail_to_connect --can retry-->upstream_peer;
    fail_to_connect --can't retry-->fail_to_proxy--send error response-->logging;

    RecvResp--failure-->IOFailure;
    SendReq--failure-->IOFailure;
    error_while_proxy--can retry-->upstream_peer;
    error_while_proxy--can't retry-->fail_to_proxy;

    request_filter --send response-->logging


    Error>any response filter error]-->error_while_proxy
    IOFailure>IO error]-->error_while_proxy
```

### General filter usage guidelines
* Most filters return a [`pingora_error::Result<_>`](errors.md). When the returned value is `Result::Err`, `fail_to_proxy()` will be called and the request will be terminated.
* Most filters are async functions, which allows other async operations such as IO to be performed within the filters.
* A per-request `CTX` object can be defined to share states across the filters of the same request. All filters have mutable access to this object.
* Most filters are optional.
* The reason both `upstream_response_*_filter()` and `response_*_filter()` exist is for HTTP caching integration reasons (still WIP).


### `early_request_filter()`
This is the first phase of every request.

This function is similar to `request_filter()` but executes before any other logic, including downstream module logic. The main purpose of this function is to provide finer-grained control of the behavior of the modules.

### `request_filter()`
This phase is usually for validating request inputs, rate limiting, and initializing context.

### `request_body_filter()`
This phase is triggered after a request body is ready to send to upstream. It will be called every time a piece of request body is received.

### `proxy_upstream_filter()`
This phase determines if we should continue to the upstream to serve a response. If we short-circuit, a 502 is returned by default, but a different response can be implemented.

This phase returns a boolean determining if we should continue to the upstream or error.

### `upstream_peer()`
This phase decides which upstream to connect to (e.g. with DNS lookup and hashing/round-robin), and how to connect to it.

This phase returns a `Peer` that defines the upstream to connect to. Implementing this phase is **required**.

### `connected_to_upstream()`
This phase is executed when upstream is successfully connected.

Usually this phase is for logging purposes. Connection info such as RTT and upstream TLS ciphers are reported in this phase.

### `fail_to_connect()`
The counterpart of `connected_to_upstream()`. This phase is called if an error is encountered when connecting to upstream.

In this phase users can report the error in Sentry/Prometheus/error log. Users can also decide if the error is retry-able.

If the error is retry-able, `upstream_peer()` will be called again, in which case the user can decide whether to retry the same upstream or failover to a secondary one.

If the error is not retry-able, the request will end.

### `upstream_request_filter()`
This phase is to modify requests before sending to upstream.

### `upstream_response_filter()/upstream_response_body_filter()/upstream_response_trailer_filter()`
This phase is triggered after an upstream response header/body/trailer is received.

This phase is to modify or process response headers, body, or trailers before sending to downstream. Note that this phase is called _prior_ to HTTP caching and therefore any changes made here will affect the response stored in the HTTP cache.

### `response_filter()/response_body_filter()/response_trailer_filter()`
This phase is triggered after a response header/body/trailer is ready to send to downstream.

This phase is to modify them before sending to downstream.

### `error_while_proxy()`
This phase is triggered during proxy errors to upstream, this is after the connection is established.

This phase may decide to retry a request if the connection was re-used and the HTTP method is idempotent.

### `fail_to_proxy()`
This phase is called whenever an error is encounter during any of the phases above.

This phase is usually for error logging and error reporting to downstream.

### `logging()`
This is the last phase that runs after the request is finished (or errors) and before any of its resources are released. Every request will end up in this final phase.

This phase is usually for logging and post request cleanup.

### `request_summary()`
This is not a phase, but a commonly used callback.

Every error that reaches `fail_to_proxy()` will be automatically logged in the error log. `request_summary()` will be called to dump the info regarding the request when logging the error.

This callback returns a string which allows users to customize what info to dump in the error log to help track and debug the failures.

### `suppress_error_log()`
This is also not a phase, but another callback.

`fail_to_proxy()` errors are automatically logged in the error log, but users may not be interested in every error. For example, downstream errors are logged if the client disconnects early, but these errors can become noisy if users are mainly interested in observing upstream issues. This callback can inspect the error and returns true or false. If true, the error will not be written to the log.

### Cache filters

To be documented


================================================
FILE: docs/user_guide/phase_chart.md
================================================
Pingora proxy phases without caching
```mermaid
 graph TD;
    start("new request")-->early_request_filter;
    early_request_filter-->request_filter;
    request_filter-->upstream_peer;

    upstream_peer-->Connect{{IO: connect to upstream}};

    Connect--connection success-->connected_to_upstream;
    Connect--connection failure-->fail_to_connect;

    connected_to_upstream-->upstream_request_filter;
    upstream_request_filter --> request_body_filter;
    request_body_filter --> SendReq{{IO: send request to upstream}};
    SendReq-->RecvResp{{IO: read response from upstream}};
    RecvResp-->upstream_response_filter-->response_filter-->upstream_response_body_filter-->response_body_filter-->logging-->endreq("request done");

    fail_to_connect --can retry-->upstream_peer;
    fail_to_connect --can't retry-->fail_to_proxy--send error response-->logging;

    RecvResp--failure-->IOFailure;
    SendReq--failure-->IOFailure;
    error_while_proxy--can retry-->upstream_peer;
    error_while_proxy--can't retry-->fail_to_proxy;

    request_filter --send response-->logging


    Error>any response filter error]-->error_while_proxy
    IOFailure>IO error]-->error_while_proxy
```

================================================
FILE: docs/user_guide/pooling.md
================================================
# Connection pooling and reuse

When the request to a `Peer` (upstream server) is finished, the connection to that peer is kept alive and added to a connection pool to be _reused_ by subsequent requests. This happens automatically without any special configuration.

Requests that reuse previously established connections avoid the latency and compute cost of setting up a new connection, improving the Pingora server's overall performance and scalability.

## Same `Peer`
Only the connections to the exact same `Peer` can be reused by a request. For correctness and security reasons, two `Peer`s are the same if and only if all the following attributes are the same
* IP:port
* scheme
* SNI
* client cert
* verify cert
* verify hostname
* alternative_cn
* proxy settings

## Disable pooling
To disable connection pooling and reuse to a certain `Peer`, just set the `idle_timeout` to 0 seconds to all requests using that `Peer`.

## Failure
A connection is considered not reusable if errors happen during the request.


================================================
FILE: docs/user_guide/prom.md
================================================
# Prometheus

Pingora has a built-in prometheus HTTP metric server for scraping.

```rust
    ...
    let mut prometheus_service_http = Service::prometheus_http_service();
    prometheus_service_http.add_tcp("0.0.0.0:1234");
    my_server.add_service(prometheus_service_http);
    my_server.run_forever();
```

The simplest way to use it is to have [static metrics](https://docs.rs/prometheus/latest/prometheus/#static-metrics).

```rust
static MY_COUNTER: Lazy<IntGauge> = Lazy::new(|| {
    register_int_gauge!("my_counter", "my counter").unwrap()
});

```

This static metric will automatically appear in the Prometheus metric endpoint.


================================================
FILE: docs/user_guide/rate_limiter.md
================================================
# **RateLimiter quickstart**
Pingora provides a crate `pingora-limits` which provides a simple and easy to use rate limiter for your application. Below is an example of how you can use [`Rate`](https://docs.rs/pingora-limits/latest/pingora_limits/rate/struct.Rate.html) to create an application that uses multiple limiters to restrict the rate at which requests can be made on a per-app basis (determined by a request header).

## Steps
1. Add the following dependencies to your `Cargo.toml`:
   ```toml
   async-trait="0.1"
   pingora = { version = "0.3", features = [ "lb" ] }
   pingora-limits = "0.3.0"
   once_cell = "1.19.0"
   ```
2. Declare a global rate limiter map to store the rate limiter for each client. In this example, we use `appid`.
3. Override the `request_filter` method in the `ProxyHttp` trait to implement rate limiting.
   1. Retrieve the client appid from header.
   2. Retrieve the current window requests from the rate limiter map. If there is no rate limiter for the client, create a new one and insert it into the map.
   3. If the current window requests exceed the limit, return 429 and set RateLimiter associated headers.
   4. If the request is not rate limited, return `Ok(false)` to continue the request.

## Example
```rust
use async_trait::async_trait;
use once_cell::sync::Lazy;
use pingora::prelude::*;
use pingora_limits::rate::Rate;
use std::sync::Arc;
use std::time::Duration;

fn main() {
    let mut server = Server::new(Some(Opt::default())).unwrap();
    server.bootstrap();
    let mut upstreams = LoadBalancer::try_from_iter(["1.1.1.1:443", "1.0.0.1:443"]).unwrap();
    // Set health check
    let hc = TcpHealthCheck::new();
    upstreams.set_health_check(hc);
    upstreams.health_check_frequency = Some(Duration::from_secs(1));
    // Set background service
    let background = background_service("health check", upstreams);
    let upstreams = background.task();
    // Set load balancer
    let mut lb = http_proxy_service(&server.configuration, LB(upstreams));
    lb.add_tcp("0.0.0.0:6188");

    // let rate = Rate
    server.add_service(background);
    server.add_service(lb);
    server.run_forever();
}

pub struct LB(Arc<LoadBalancer<RoundRobin>>);

impl LB {
    pub fn get_request_appid(&self, session: &mut Session) -> Option<String> {
        match session
            .req_header()
            .headers
            .get("appid")
            .map(|v| v.to_str())
        {
            None => None,
            Some(v) => match v {
                Ok(v) => Some(v.to_string()),
                Err(_) => None,
            },
        }
    }
}

// Rate limiter
static RATE_LIMITER: Lazy<Rate> = Lazy::new(|| Rate::new(Duration::from_secs(1)));

// max request per second per client
static MAX_REQ_PER_SEC: isize = 1;

#[async_trait]
impl ProxyHttp for LB {
    type CTX = ();

    fn new_ctx(&self) {}

    async fn upstream_peer(
        &self,
        _session: &mut Session,
        _ctx: &mut Self::CTX,
    ) -> Result<Box<HttpPeer>> {
        let upstream = self.0.select(b"", 256).unwrap();
        // Set SNI
        let peer = Box::new(HttpPeer::new(upstream, true, "one.one.one.one".to_string()));
        Ok(peer)
    }

    async fn upstream_request_filter(
        &self,
        _session: &mut Session,
        upstream_request: &mut RequestHeader,
        _ctx: &mut Self::CTX,
    ) -> Result<()>
    where
        Self::CTX: Send + Sync,
    {
        upstream_request
            .insert_header("Host", "one.one.one.one")
            .unwrap();
        Ok(())
    }

    async fn request_filter(&self, session: &mut Session, _ctx: &mut Self::CTX) -> Result<bool>
    where
        Self::CTX: Send + Sync,
    {
        let appid = match self.get_request_appid(session) {
            None => return Ok(false), // no client appid found, skip rate limiting
            Some(addr) => addr,
        };

        // retrieve the current window requests
        let curr_window_requests = RATE_LIMITER.observe(&appid, 1);
        if curr_window_requests > MAX_REQ_PER_SEC {
            // rate limited, return 429
            let mut header = ResponseHeader::build(429, None).unwrap();
            header
                .insert_header("X-Rate-Limit-Limit", MAX_REQ_PER_SEC.to_string())
                .unwrap();
            header.insert_header("X-Rate-Limit-Remaining", "0").unwrap();
            header.insert_header("X-Rate-Limit-Reset", "1").unwrap();
            session.set_keepalive(None);
            session
                .write_response_header(Box::new(header), true)
                .await?;
            return Ok(true);
        }
        Ok(false)
    }
}
```

## Testing
To use the example above,

1. Run your program with `cargo run`.
2. Verify the program is working with a few executions of ` curl localhost:6188 -H "appid:1" -v`
   - The first request should work and any later requests that arrive within 1s of a previous request should fail with:
     ```
     *   Trying 127.0.0.1:6188...
     * Connected to localhost (127.0.0.1) port 6188 (#0)
     > GET / HTTP/1.1
     > Host: localhost:6188
     > User-Agent: curl/7.88.1
     > Accept: */*
     > appid:1
     >
     < HTTP/1.1 429 Too Many Requests
     < X-Rate-Limit-Limit: 1
     < X-Rate-Limit-Remaining: 0
     < X-Rate-Limit-Reset: 1
     < Date: Sun, 14 Jul 2024 20:29:02 GMT
     < Connection: close
     <
     * Closing connection 0
     ```

## Complete Example
You can run the pre-made example code in the [`pingora-proxy` examples folder](https://github.com/cloudflare/pingora/tree/main/pingora-proxy/examples/rate_limiter.rs) with

```
cargo run --example rate_limiter
```


================================================
FILE: docs/user_guide/start_stop.md
================================================
# Starting and stopping Pingora server

A pingora server is a regular unprivileged multithreaded process.

## Start
By default, the server will run in the foreground.

A Pingora server by default takes the following command-line arguments:

| Argument      | Effect        | default|
| ------------- |-------------| ----|
| -d, --daemon | Daemonize the server | false |
| -t, --test | Test the server conf and then exit (WIP) | false |
| -c, --conf | The path to the configuration file | empty string |
| -u, --upgrade | This server should gracefully upgrade a running server | false |

## Stop
A Pingora server will listen to the following signals.

### SIGINT: fast shutdown
Upon receiving SIGINT (ctrl + c), the server will exit immediately with no delay. All unfinished requests will be interrupted. This behavior is usually less preferred because it could break requests.

### SIGTERM: graceful shutdown
Upon receiving SIGTERM, the server will notify all its services to shutdown, wait for some preconfigured time and then exit. This behavior gives requests a grace period to finish.

### SIGQUIT: graceful upgrade
Similar to SIGTERM, but the server will also transfer all its listening sockets to a new Pingora server so that there is no downtime during the upgrade. See the [graceful upgrade](graceful.md) section for more details.


================================================
FILE: docs/user_guide/systemd.md
================================================
# Systemd integration

A Pingora server doesn't depend on systemd but it can easily be made into a systemd service.

```ini
[Service]
Type=forking
PIDFile=/run/pingora.pid
ExecStart=/bin/pingora -d -c /etc/pingora.conf
ExecReload=kill -QUIT $MAINPID
ExecReload=/bin/pingora -u -d -c /etc/pingora.conf
```

The example systemd setup integrates Pingora's graceful upgrade into systemd. To upgrade the pingora service, simply install a version of the binary and then call `systemctl reload pingora.service`.


================================================
FILE: pingora/Cargo.toml
================================================
[package]
name = "pingora"
version = "0.8.0"
authors = ["Yuchen Wu <yuchen@cloudflare.com>"]
license = "Apache-2.0"
edition = "2021"
repository = "https://github.com/cloudflare/pingora"
description = """
A framework to build fast, reliable and programmable networked systems at Internet scale.
"""
categories = ["asynchronous", "network-programming"]
keywords = ["async", "proxy", "http", "pingora"]

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[lib]
name = "pingora"
path = "src/lib.rs"

[package.metadata.docs.rs]
features = ["document-features"]
rustdoc-args = ["--cfg", "docsrs"]

[dependencies]
pingora-core = { version = "0.8.0", path = "../pingora-core", default-features = false }
pingora-http = { version = "0.8.0", path = "../pingora-http" }
pingora-timeout = { version = "0.8.0", path = "../pingora-timeout" }
pingora-load-balancing = { version = "0.8.0", path = "../pingora-load-balancing", optional = true, default-features = false }
pingora-proxy = { version = "0.8.0", path = "../pingora-proxy", optional = true, default-features = false }
pingora-cache = { version = "0.8.0", path = "../pingora-cache", optional = true, default-features = false }

# Only used for documenting features, but doesn't work in any other dependency 
# group :(
document-features = { version = "0.2.10", optional = true }

[dev-dependencies]
clap = { version = "4.5", features = ["derive"] }
tokio = { workspace = true, features = ["rt-multi-thread", "signal"] }
env_logger = "0.11"
reqwest = { version = "0.11", features = ["rustls"], default-features = false }
hyper = "0.14"
async-trait = { workspace = true }
http = { workspace = true }
log = { workspace = true }
prometheus = "0.13"
once_cell = { workspace = true }
bytes = { workspace = true }
regex = "1"

[target.'cfg(unix)'.dev-dependencies]
hyperlocal = "0.8"
jemallocator = "0.5"

[features]
default = []

#! ### Tls
#! Tls is provided by adding one of these features. If no tls-providing feature
#! is added, only unencrypted http. Only one tls-providing feature can be
#! selected at a time

## Use [OpenSSL](https://crates.io/crates/openssl) for tls
##
## Requires native openssl libraries and build tooling
openssl = [
    "pingora-core/openssl",
    "pingora-proxy?/openssl",
    "pingora-cache?/openssl",
    "pingora-load-balancing?/openssl",
    "openssl_derived",
]

## Use [BoringSSL](https://crates.io/crates/boring) for tls 
##
## Requires native boring libraries and build tooling
boringssl = [
    "pingora-core/boringssl",
    "pingora-proxy?/boringssl",
    "pingora-cache?/boringssl",
    "pingora-load-balancing?/boringssl",
    "openssl_derived",
]

## Use  [s2n-tls](https://crates.io/crates/s2n-tls) for tls
##
## Requires native s2n-tls libraries and build tooling
s2n = [
    "pingora-core/s2n",
    "pingora-proxy?/s2n",
    "pingora-cache?/s2n",
    "pingora-load-balancing?/s2n",
    "any_tls",
]

## Use  [rustls](https://crates.io/crates/rustls) for tls 
##
## ⚠️ _Highly Experimental_! ⚠️ Try it, but don't rely on it (yet)
rustls = [
    "pingora-core/rustls",
    "pingora-proxy?/rustls",
    "pingora-cache?/rustls",
    "pingora-load-balancing?/rustls",
    "any_tls",
]

#! ### Pingora extensions

## Include the [proxy](crate::proxy) module
##
## This feature will include and export `pingora_proxy::prelude::*`
proxy = ["pingora-proxy"]

## Include the [lb](crate::lb) (load-balancing) module
##
## This feature will include and export `pingora_load_balancing::prelude::*`
lb = ["pingora-load-balancing", "proxy"]

## Include the [cache](crate::cache) module
##
## This feature will include and export `pingora_cache::prelude::*`
cache = ["pingora-cache"]

## Enable time/scheduling functionality
time = []

## Enable sentry for error notifications
sentry = ["pingora-core/sentry"]

## Enable pre-TLS connection filtering
connection_filter = [
    "pingora-core/connection_filter",
    "pingora-proxy?/connection_filter",
]


# These features are intentionally not documented
openssl_derived = ["any_tls"]
any_tls = []
patched_http1 = ["pingora-core/patched_http1"]
document-features = [
    "dep:document-features",
    "proxy",
    "lb",
    "cache",
    "time",
    "sentry",
    "connection_filter"
]


================================================
FILE: pingora/LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: pingora/examples/app/echo.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use async_trait::async_trait;
use bytes::Bytes;
use http::{Response, StatusCode};
use log::debug;
use once_cell::sync::Lazy;
use pingora_timeout::timeout;
use prometheus::{register_int_counter, IntCounter};
use std::sync::Arc;
use std::time::Duration;
use tokio::io::{AsyncReadExt, AsyncWriteExt};

use pingora::apps::http_app::ServeHttp;
use pingora::apps::ServerApp;
use pingora::protocols::http::ServerSession;
use pingora::protocols::Stream;
use pingora::server::ShutdownWatch;

static REQ_COUNTER: Lazy<IntCounter> =
    Lazy::new(|| register_int_counter!("reg_counter", "Number of requests").unwrap());

#[derive(Clone)]
pub struct EchoApp;

#[async_trait]
impl ServerApp for EchoApp {
    async fn process_new(
        self: &Arc<Self>,
        mut io: Stream,
        _shutdown: &ShutdownWatch,
    ) -> Option<Stream> {
        let mut buf = [0; 1024];
        loop {
            let n = io.read(&mut buf).await.unwrap();
            if n == 0 {
                debug!("session closing");
                return None;
            }
            io.write_all(&buf[0..n]).await.unwrap();
            io.flush().await.unwrap();
        }
    }
}

pub struct HttpEchoApp;

#[async_trait]
impl ServeHttp for HttpEchoApp {
    async fn response(&self, http_stream: &mut ServerSession) -> Response<Vec<u8>> {
        REQ_COUNTER.inc();
        // read timeout of 2s
        let read_timeout = 2000;
        let body = match timeout(
            Duration::from_millis(read_timeout),
            http_stream.read_request_body(),
        )
        .await
        {
            Ok(res) => match res.unwrap() {
                Some(bytes) => bytes,
                None => Bytes::from("no body!"),
            },
            Err(_) => {
                panic!("Timed out after {:?}ms", read_timeout);
            }
        };

        Response::builder()
            .status(StatusCode::OK)
            .header(http::header::CONTENT_TYPE, "text/html")
            .header(http::header::CONTENT_LENGTH, body.len())
            .body(body.to_vec())
            .unwrap()
    }
}


================================================
FILE: pingora/examples/app/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

pub mod echo;
pub mod proxy;


================================================
FILE: pingora/examples/app/proxy.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use async_trait::async_trait;
use log::debug;

use std::sync::Arc;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio::select;

use pingora::apps::ServerApp;
use pingora::connectors::TransportConnector;
use pingora::protocols::Stream;
use pingora::server::ShutdownWatch;
use pingora::upstreams::peer::BasicPeer;

pub struct ProxyApp {
    client_connector: TransportConnector,
    proxy_to: BasicPeer,
}

enum DuplexEvent {
    DownstreamRead(usize),
    UpstreamRead(usize),
}

impl ProxyApp {
    pub fn new(proxy_to: BasicPeer) -> Self {
        ProxyApp {
            client_connector: TransportConnector::new(None),
            proxy_to,
        }
    }

    async fn duplex(&self, mut server_session: Stream, mut client_session: Stream) {
        let mut upstream_buf = [0; 1024];
        let mut downstream_buf = [0; 1024];
        loop {
            let downstream_read = server_session.read(&mut upstream_buf);
            let upstream_read = client_session.read(&mut downstream_buf);
            let event: DuplexEvent;
            select! {
                n = downstream_read => event
                    = DuplexEvent::DownstreamRead(n.unwrap()),
                n = upstream_read => event
                    = DuplexEvent::UpstreamRead(n.unwrap()),
            }
            match event {
                DuplexEvent::DownstreamRead(0) => {
                    debug!("downstream session closing");
                    return;
                }
                DuplexEvent::UpstreamRead(0) => {
                    debug!("upstream session closing");
                    return;
                }
                DuplexEvent::DownstreamRead(n) => {
                    client_session.write_all(&upstream_buf[0..n]).await.unwrap();
                    client_session.flush().await.unwrap();
                }
                DuplexEvent::UpstreamRead(n) => {
                    server_session
                        .write_all(&downstream_buf[0..n])
                        .await
                        .unwrap();
                    server_session.flush().await.unwrap();
                }
            }
        }
    }
}

#[async_trait]
impl ServerApp for ProxyApp {
    async fn process_new(
        self: &Arc<Self>,
        io: Stream,
        _shutdown: &ShutdownWatch,
    ) -> Option<Stream> {
        let client_session = self.client_connector.new_stream(&self.proxy_to).await;

        match client_session {
            Ok(client_session) => {
                self.duplex(io, client_session).await;
                None
            }
            Err(e) => {
                debug!("Failed to create client session: {}", e);
                None
            }
        }
    }
}


================================================
FILE: pingora/examples/client.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use pingora::{connectors::http::Connector, prelude::*};
use regex::Regex;

#[tokio::main]
async fn main() -> Result<()> {
    let connector = Connector::new(None);

    // create the HTTP session
    let peer_addr = "1.1.1.1:443";
    let mut peer = HttpPeer::new(peer_addr, true, "one.one.one.one".into());
    peer.options.set_http_version(2, 1);
    let (mut http, _reused) = connector.get_http_session(&peer).await?;

    // perform a GET request
    let mut new_request = RequestHeader::build("GET", b"/", None)?;
    new_request.insert_header("Host", "one.one.one.one")?;
    http.write_request_header(Box::new(new_request)).await?;

    // Servers usually don't respond until the full request body is read.
    http.finish_request_body().await?;
    http.read_response_header().await?;

    // display the headers from the response
    if let Some(header) = http.response_header() {
        println!("{header:#?}");
    } else {
        return Error::e_explain(ErrorType::InvalidHTTPHeader, "No response header");
    };

    // collect the response body
    let mut response_body = String::new();
    while let Some(chunk) = http.read_response_body().await? {
        response_body.push_str(&String::from_utf8_lossy(&chunk));
    }

    // verify that the response body is valid HTML by displaying the page <title>
    let re = Regex::new(r"<title>(.*?)</title>")
        .or_err(ErrorType::InternalError, "Failed to compile regex")?;
    if let Some(title) = re
        .captures(&response_body)
        .and_then(|caps| caps.get(1).map(|match_| match_.as_str()))
    {
        println!("Page Title: {title}");
    } else {
        return Error::e_explain(
            ErrorType::new("InvalidHTML"),
            "No <title> found in response body",
        );
    }

    // gracefully release the connection
    connector
        .release_http_session(http, &peer, Some(std::time::Duration::from_secs(5)))
        .await;

    Ok(())
}


================================================
FILE: pingora/examples/server.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#[global_allocator]
static GLOBAL: jemallocator::Jemalloc = jemallocator::Jemalloc;

use pingora::listeners::tls::TlsSettings;
use pingora::protocols::TcpKeepalive;
use pingora::server::configuration::Opt;
use pingora::server::{Server, ShutdownWatch};
use pingora::services::background::{background_service, BackgroundService};
use pingora::services::listening::Service as ListeningService;
use pingora::services::ServiceWithDependents;

use async_trait::async_trait;
use clap::Parser;
use tokio::time::interval;

use std::time::Duration;

mod app;
mod service;

pub struct ExampleBackgroundService;
#[async_trait]
impl BackgroundService for ExampleBackgroundService {
    async fn start(&self, mut shutdown: ShutdownWatch) {
        let mut period = interval(Duration::from_secs(1));
        loop {
            tokio::select! {
                _ = shutdown.changed() => {
                    // shutdown
                    break;
                }
                _ = period.tick() => {
                    // do some work
                    // ...
                }
            }
        }
    }
}
#[cfg(feature = "openssl_derived")]
mod boringssl_openssl {
    use super::*;
    use pingora::tls::pkey::{PKey, Private};
    use pingora::tls::x509::X509;

    pub(super) struct DynamicCert {
        cert: X509,
        key: PKey<Private>,
    }

    impl DynamicCert {
        pub(super) fn new(cert: &str, key: &str) -> Box<Self> {
            let cert_bytes = std::fs::read(cert).unwrap();
            let cert = X509::from_pem(&cert_bytes).unwrap();

            let key_bytes = std::fs::read(key).unwrap();
            let key = PKey::private_key_from_pem(&key_bytes).unwrap();
            Box::new(DynamicCert { cert, key })
        }
    }

    #[async_trait]
    impl pingora::listeners::TlsAccept for DynamicCert {
        async fn certificate_callback(&self, ssl: &mut pingora::tls::ssl::SslRef) {
            use pingora::tls::ext;
            ext::ssl_use_certificate(ssl, &self.cert).unwrap();
            ext::ssl_use_private_key(ssl, &self.key).unwrap();
        }
    }
}

const USAGE: &str = r#"
Usage
port 6142: TCP echo server
nc 127.0.0.1 6142

port 6143: TLS echo server
openssl s_client -connect 127.0.0.1:6143

port 6145: Http echo server
curl http://127.0.0.1:6145 -v -d 'hello'

port 6148: Https echo server
curl https://127.0.0.1:6148 -vk -d 'hello'

port 6141: TCP proxy
curl http://127.0.0.1:6141 -v -H 'host: 1.1.1.1'

port 6144: TLS proxy
curl https://127.0.0.1:6144 -vk -H 'host: one.one.one.one' -o /dev/null

port 6150: metrics endpoint
curl http://127.0.0.1:6150
"#;

pub fn main() {
    env_logger::init();

    print!("{USAGE}");

    let opt = Some(Opt::parse());
    let mut my_server = Server::new(opt).unwrap();
    my_server.bootstrap();

    let cert_path = format!("{}/tests/keys/server.crt", env!("CARGO_MANIFEST_DIR"));
    let key_path = format!("{}/tests/keys/key.pem", env!("CARGO_MANIFEST_DIR"));

    let mut echo_service = service::echo::echo_service();
    echo_service.add_tcp("127.0.0.1:6142");
    echo_service
        .add_tls("0.0.0.0:6143", &cert_path, &key_path)
        .unwrap();

    let mut echo_service_http = service::echo::echo_service_http();

    let mut options = pingora::listeners::TcpSocketOptions::default();
    options.tcp_fastopen = Some(10);
    options.tcp_keepalive = Some(TcpKeepalive {
        idle: Duration::from_secs(60),
        interval: Duration::from_secs(5),
        count: 5,
        #[cfg(target_os = "linux")]
        user_timeout: Duration::from_secs(85),
    });

    echo_service_http.add_tcp_with_settings("0.0.0.0:6145", options);
    echo_service_http.add_uds("/tmp/echo.sock", None);

    let mut tls_settings;

    // NOTE: dynamic certificate callback is only supported with BoringSSL/OpenSSL
    #[cfg(feature = "openssl_derived")]
    {
        use std::ops::DerefMut;

        let dynamic_cert = boringssl_openssl::DynamicCert::new(&cert_path, &key_path);
        tls_settings = TlsSettings::with_callbacks(dynamic_cert).unwrap();
        // by default intermediate supports both TLS 1.2 and 1.3. We force to tls 1.2 just for the demo

        tls_settings
            .deref_mut()
            .deref_mut()
            .set_max_proto_version(Some(pingora::tls::ssl::SslVersion::TLS1_2))
            .unwrap();
    }
    #[cfg(feature = "rustls")]
    {
        tls_settings = TlsSettings::intermediate(&cert_path, &key_path).unwrap();
    }
    #[cfg(feature = "s2n")]
    {
        tls_settings = TlsSettings::intermediate(&cert_path, &key_path).unwrap();
    }
    #[cfg(not(feature = "any_tls"))]
    {
        tls_settings = TlsSettings;
    }

    tls_settings.enable_h2();
    echo_service_http.add_tls_with_settings("0.0.0.0:6148", None, tls_settings);

    let proxy_service = service::proxy::proxy_service(
        "0.0.0.0:6141", // listen
        "1.1.1.1:80",   // proxy to
    );

    let proxy_service_ssl = service::proxy::proxy_service_tls(
        "0.0.0.0:6144",    // listen
        "1.1.1.1:443",     // proxy to
        "one.one.one.one", // SNI
        &cert_path,
        &key_path,
    );

    let mut prometheus_service_http = ListeningService::prometheus_http_service();
    prometheus_service_http.add_tcp("127.0.0.1:6150");

    let background_service = background_service("example", ExampleBackgroundService {});

    let services: Vec<Box<dyn ServiceWithDependents>> = vec![
        Box::new(echo_service),
        Box::new(echo_service_http),
        Box::new(proxy_service),
        Box::new(proxy_service_ssl),
        Box::new(prometheus_service_http),
        Box::new(background_service),
    ];
    my_server.add_services(services);
    my_server.run_forever();
}


================================================
FILE: pingora/examples/service/echo.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use crate::app::echo::{EchoApp, HttpEchoApp};
use pingora::apps::http_app::HttpServer;
use pingora::services::listening::Service;

pub fn echo_service() -> Service<EchoApp> {
    Service::new("Echo Service".to_string(), EchoApp)
}

pub fn echo_service_http() -> Service<HttpServer<HttpEchoApp>> {
    let server = HttpServer::new_app(HttpEchoApp);
    Service::new("Echo Service HTTP".to_string(), server)
}


================================================
FILE: pingora/examples/service/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

pub mod echo;
pub mod proxy;


================================================
FILE: pingora/examples/service/proxy.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use crate::app::proxy::ProxyApp;
use pingora_core::listeners::Listeners;
use pingora_core::services::listening::Service;
use pingora_core::upstreams::peer::BasicPeer;

pub fn proxy_service(addr: &str, proxy_addr: &str) -> Service<ProxyApp> {
    let proxy_to = BasicPeer::new(proxy_addr);

    Service::with_listeners(
        "Proxy Service".to_string(),
        Listeners::tcp(addr),
        ProxyApp::new(proxy_to),
    )
}

pub fn proxy_service_tls(
    addr: &str,
    proxy_addr: &str,
    proxy_sni: &str,
    cert_path: &str,
    key_path: &str,
) -> Service<ProxyApp> {
    let mut proxy_to = BasicPeer::new(proxy_addr);
    // set SNI to enable TLS
    proxy_to.sni = proxy_sni.into();
    Service::with_listeners(
        "Proxy Service TLS".to_string(),
        Listeners::tls(addr, cert_path, key_path).unwrap(),
        ProxyApp::new(proxy_to),
    )
}


================================================
FILE: pingora/src/lib.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#![warn(clippy::all)]
#![allow(clippy::new_without_default)]
#![allow(clippy::type_complexity)]
#![allow(clippy::match_wild_err_arm)]
#![allow(clippy::missing_safety_doc)]
#![allow(clippy::upper_case_acronyms)]
// This enables the feature that labels modules that are only available with
// certain pingora features
#![cfg_attr(docsrs, feature(doc_cfg))]

//! # Pingora
//!
//! Pingora is a framework to build fast, reliable and programmable networked systems at Internet scale.
//!
//! # Features
//! - Http 1.x and Http 2
//! - Modern TLS with OpenSSL or BoringSSL (FIPS compatible)
//! - Zero downtime upgrade
//!
//! # Usage
//! This crate provides low level service and protocol implementation and abstraction.
//!
//! If looking to build a (reverse) proxy, see [`pingora-proxy`](https://docs.rs/pingora-proxy) crate.
//!
//! # Feature flags
#![cfg_attr(
    feature = "document-features",
    cfg_attr(doc, doc = ::document_features::document_features!())
)]

pub use pingora_core::*;

/// HTTP header objects that preserve http header cases
pub mod http {
    pub use pingora_http::*;
}

#[cfg(feature = "cache")]
#[cfg_attr(docsrs, doc(cfg(feature = "cache")))]
/// Caching services and tooling
pub mod cache {
    pub use pingora_cache::*;
}

#[cfg(feature = "lb")]
#[cfg_attr(docsrs, doc(cfg(feature = "lb")))]
/// Load balancing recipes
pub mod lb {
    pub use pingora_load_balancing::*;
}

#[cfg(feature = "proxy")]
#[cfg_attr(docsrs, doc(cfg(feature = "proxy")))]
/// Proxying recipes
pub mod proxy {
    pub use pingora_proxy::*;
}

#[cfg(feature = "time")]
#[cfg_attr(docsrs, doc(cfg(feature = "time")))]
/// Timeouts and other useful time utilities
pub mod time {
    pub use pingora_timeout::*;
}

/// A useful set of types for getting started
pub mod prelude {
    pub use pingora_core::prelude::*;
    pub use pingora_http::prelude::*;
    pub use pingora_timeout::*;

    #[cfg(feature = "cache")]
    #[cfg_attr(docsrs, doc(cfg(feature = "cache")))]
    pub use pingora_cache::prelude::*;

    #[cfg(feature = "lb")]
    #[cfg_attr(docsrs, doc(cfg(feature = "lb")))]
    pub use pingora_load_balancing::prelude::*;

    #[cfg(feature = "proxy")]
    #[cfg_attr(docsrs, doc(cfg(feature = "proxy")))]
    pub use pingora_proxy::prelude::*;

    #[cfg(feature = "time")]
    #[cfg_attr(docsrs, doc(cfg(feature = "time")))]
    pub use pingora_timeout::*;
}


================================================
FILE: pingora/tests/pingora_conf.yaml
================================================
---
version: 1
client_bind_to_ipv4:
    - 127.0.0.2
ca_file: tests/keys/server.crt

================================================
FILE: pingora-boringssl/Cargo.toml
================================================
[package]
name = "pingora-boringssl"
version = "0.8.0"
authors = ["Yuchen Wu <yuchen@cloudflare.com>"]
license = "Apache-2.0"
edition = "2021"
repository = "https://github.com/cloudflare/pingora"
categories = ["asynchronous", "network-programming"]
keywords = ["async", "tls", "ssl", "pingora"]
description = """
BoringSSL async APIs for Pingora.
"""

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "pingora_boringssl"
path = "src/lib.rs"

[dependencies]
boring = { version = "4.5", features = ["pq-experimental"] }
boring-sys = "4.5"
futures-util = { version = "0.3", default-features = false }
tokio = { workspace = true, features = ["io-util", "net", "macros", "rt-multi-thread"] }
libc = "0.2.70"
foreign-types-shared = { version = "0.3" }


[dev-dependencies]
tokio-test = "0.4"
tokio = { workspace = true, features = ["full"] }

[features]
default = []
pq_use_second_keyshare = []
# waiting for boring-rs release
read_uninit = []


================================================
FILE: pingora-boringssl/LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: pingora-boringssl/src/boring_tokio.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! This file reimplements tokio-boring with the [overhauled](https://github.com/sfackler/tokio-openssl/commit/56f6618ab619f3e431fa8feec2d20913bf1473aa)
//! tokio-openssl interface while the tokio APIs from official [boring] crate is not yet caught up to it.

use boring::error::ErrorStack;
use boring::ssl::{self, ErrorCode, ShutdownResult, Ssl, SslRef, SslStream as SslStreamCore};
use futures_util::future;
use std::fmt;
use std::io::{self, Read, Write};
use std::pin::Pin;
use std::task::{Context, Poll};
use tokio::io::{AsyncRead, AsyncWrite, ReadBuf};

struct StreamWrapper<S> {
    stream: S,
    context: usize,
}

impl<S> fmt::Debug for StreamWrapper<S>
where
    S: fmt::Debug,
{
    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
        fmt::Debug::fmt(&self.stream, fmt)
    }
}

impl<S> StreamWrapper<S> {
    /// # Safety
    ///
    /// Must be called with `context` set to a valid pointer to a live `Context` object, and the
    /// wrapper must be pinned in memory.
    unsafe fn parts(&mut self) -> (Pin<&mut S>, &mut Context<'_>) {
        debug_assert_ne!(self.context, 0);
        let stream = Pin::new_unchecked(&mut self.stream);
        let context = &mut *(self.context as *mut _);
        (stream, context)
    }
}

impl<S> Read for StreamWrapper<S>
where
    S: AsyncRead,
{
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
        let (stream, cx) = unsafe { self.parts() };
        let mut buf = ReadBuf::new(buf);
        match stream.poll_read(cx, &mut buf)? {
            Poll::Ready(()) => Ok(buf.filled().len()),
            Poll::Pending => Err(io::Error::from(io::ErrorKind::WouldBlock)),
        }
    }
}

impl<S> Write for StreamWrapper<S>
where
    S: AsyncWrite,
{
    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
        let (stream, cx) = unsafe { self.parts() };
        match stream.poll_write(cx, buf) {
            Poll::Ready(r) => r,
            Poll::Pending => Err(io::Error::from(io::ErrorKind::WouldBlock)),
        }
    }

    fn flush(&mut self) -> io::Result<()> {
        let (stream, cx) = unsafe { self.parts() };
        match stream.poll_flush(cx) {
            Poll::Ready(r) => r,
            Poll::Pending => Err(io::Error::from(io::ErrorKind::WouldBlock)),
        }
    }
}

fn cvt<T>(r: io::Result<T>) -> Poll<io::Result<T>> {
    match r {
        Ok(v) => Poll::Ready(Ok(v)),
        Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => Poll::Pending,
        Err(e) => Poll::Ready(Err(e)),
    }
}

fn cvt_ossl<T>(r: Result<T, ssl::Error>) -> Poll<Result<T, ssl::Error>> {
    match r {
        Ok(v) => Poll::Ready(Ok(v)),
        Err(e) => match e.code() {
            ErrorCode::WANT_READ | ErrorCode::WANT_WRITE => Poll::Pending,
            _ => Poll::Ready(Err(e)),
        },
    }
}

/// An asynchronous version of [`boring::ssl::SslStream`].
#[derive(Debug)]
pub struct SslStream<S>(SslStreamCore<StreamWrapper<S>>);

impl<S: AsyncRead + AsyncWrite> SslStream<S> {
    /// Like [`SslStream::new`](ssl::SslStream::new).
    pub fn new(ssl: Ssl, stream: S) -> Result<Self, ErrorStack> {
        SslStreamCore::new(ssl, StreamWrapper { stream, context: 0 }).map(SslStream)
    }

    /// Like [`SslStream::connect`](ssl::SslStream::connect).
    pub fn poll_connect(
        self: Pin<&mut Self>,
        cx: &mut Context<'_>,
    ) -> Poll<Result<(), ssl::Error>> {
        self.with_context(cx, |s| cvt_ossl(s.connect()))
    }

    /// A convenience method wrapping [`poll_connect`](Self::poll_connect).
    pub async fn connect(mut self: Pin<&mut Self>) -> Result<(), ssl::Error> {
        future::poll_fn(|cx| self.as_mut().poll_connect(cx)).await
    }

    /// Like [`SslStream::accept`](ssl::SslStream::accept).
    pub fn poll_accept(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Result<(), ssl::Error>> {
        self.with_context(cx, |s| cvt_ossl(s.accept()))
    }

    /// A convenience method wrapping [`poll_accept`](Self::poll_accept).
    pub async fn accept(mut self: Pin<&mut Self>) -> Result<(), ssl::Error> {
        future::poll_fn(|cx| self.as_mut().poll_accept(cx)).await
    }

    /// Like [`SslStream::do_handshake`](ssl::SslStream::do_handshake).
    pub fn poll_do_handshake(
        self: Pin<&mut Self>,
        cx: &mut Context<'_>,
    ) -> Poll<Result<(), ssl::Error>> {
        self.with_context(cx, |s| cvt_ossl(s.do_handshake()))
    }

    /// A convenience method wrapping [`poll_do_handshake`](Self::poll_do_handshake).
    pub async fn do_handshake(mut self: Pin<&mut Self>) -> Result<(), ssl::Error> {
        future::poll_fn(|cx| self.as_mut().poll_do_handshake(cx)).await
    }

    // TODO: early data
}

impl<S> SslStream<S> {
    /// Returns a shared reference to the `Ssl` object associated with this stream.
    pub fn ssl(&self) -> &SslRef {
        self.0.ssl()
    }

    /// Returns a shared reference to the underlying stream.
    pub fn get_ref(&self) -> &S {
        &self.0.get_ref().stream
    }

    /// Returns a mutable reference to the underlying stream.
    pub fn get_mut(&mut self) -> &mut S {
        &mut self.0.get_mut().stream
    }

    /// Returns a pinned mutable reference to the underlying stream.
    pub fn get_pin_mut(self: Pin<&mut Self>) -> Pin<&mut S> {
        unsafe { Pin::new_unchecked(&mut self.get_unchecked_mut().0.get_mut().stream) }
    }

    fn with_context<F, R>(self: Pin<&mut Self>, ctx: &mut Context<'_>, f: F) -> R
    where
        F: FnOnce(&mut SslStreamCore<StreamWrapper<S>>) -> R,
    {
        let this = unsafe { self.get_unchecked_mut() };
        this.0.get_mut().context = ctx as *mut _ as usize;
        let r = f(&mut this.0);
        this.0.get_mut().context = 0;
        r
    }
}

#[cfg(feature = "read_uninit")]
impl<S> AsyncRead for SslStream<S>
where
    S: AsyncRead + AsyncWrite,
{
    fn poll_read(
        self: Pin<&mut Self>,
        ctx: &mut Context<'_>,
        buf: &mut ReadBuf<'_>,
    ) -> Poll<io::Result<()>> {
        self.with_context(ctx, |s| {
            // SAFETY: read_uninit does not de-initialize the buffer.
            match cvt(s.read_uninit(unsafe { buf.unfilled_mut() }))? {
                Poll::Ready(nread) => {
                    unsafe {
                        buf.assume_init(nread);
                    }
                    buf.advance(nread);
                    Poll::Ready(Ok(()))
                }
                Poll::Pending => Poll::Pending,
            }
        })
    }
}

#[cfg(not(feature = "read_uninit"))]
impl<S> AsyncRead for SslStream<S>
where
    S: AsyncRead + AsyncWrite,
{
    fn poll_read(
        self: Pin<&mut Self>,
        ctx: &mut Context<'_>,
        buf: &mut ReadBuf<'_>,
    ) -> Poll<io::Result<()>> {
        self.with_context(ctx, |s| {
            // This isn't really "proper", but rust-openssl doesn't currently expose a suitable interface even though
            // OpenSSL itself doesn't require the buffer to be initialized. So this is good enough for now.
            let slice = unsafe {
                let buf = buf.unfilled_mut();
                std::slice::from_raw_parts_mut(buf.as_mut_ptr().cast::<u8>(), buf.len())
            };
            match cvt(s.read(slice))? {
                Poll::Ready(nread) => {
                    unsafe {
                        buf.assume_init(nread);
                    }
                    buf.advance(nread);
                    Poll::Ready(Ok(()))
                }
                Poll::Pending => Poll::Pending,
            }
        })
    }
}

impl<S> AsyncWrite for SslStream<S>
where
    S: AsyncRead + AsyncWrite,
{
    fn poll_write(self: Pin<&mut Self>, ctx: &mut Context, buf: &[u8]) -> Poll<io::Result<usize>> {
        self.with_context(ctx, |s| cvt(s.write(buf)))
    }

    fn poll_flush(self: Pin<&mut Self>, ctx: &mut Context) -> Poll<io::Result<()>> {
        self.with_context(ctx, |s| cvt(s.flush()))
    }

    fn poll_shutdown(mut self: Pin<&mut Self>, ctx: &mut Context) -> Poll<io::Result<()>> {
        match self.as_mut().with_context(ctx, |s| s.shutdown()) {
            Ok(ShutdownResult::Sent) | Ok(ShutdownResult::Received) => {}
            Err(ref e) if e.code() == ErrorCode::ZERO_RETURN => {}
            Err(ref e) if e.code() == ErrorCode::WANT_READ || e.code() == ErrorCode::WANT_WRITE => {
                return Poll::Pending;
            }
            Err(e) => {
                return Poll::Ready(Err(e.into_io_error().unwrap_or_else(io::Error::other)));
            }
        }

        self.get_pin_mut().poll_shutdown(ctx)
    }
}

#[tokio::test]
async fn test_google() {
    use boring::ssl;
    use std::net::ToSocketAddrs;
    use std::pin::Pin;
    use tokio::io::{AsyncReadExt, AsyncWriteExt};
    use tokio::net::TcpStream;

    let addr = "8.8.8.8:443".to_socket_addrs().unwrap().next().unwrap();
    let stream = TcpStream::connect(&addr).await.unwrap();

    let ssl_context = ssl::SslContext::builder(ssl::SslMethod::tls())
        .unwrap()
        .build();
    let ssl = ssl::Ssl::new(&ssl_context).unwrap();
    let mut stream = crate::tokio_ssl::SslStream::new(ssl, stream).unwrap();

    Pin::new(&mut stream).connect().await.unwrap();

    stream.write_all(b"GET / HTTP/1.0\r\n\r\n").await.unwrap();

    let mut buf = vec![];
    stream.read_to_end(&mut buf).await.unwrap();
    let response = String::from_utf8_lossy(&buf);
    let response = response.trim_end();

    // any response code is fine
    assert!(response.starts_with("HTTP/1.0 "));
    assert!(response.ends_with("</html>") || response.ends_with("</HTML>"));
}


================================================
FILE: pingora-boringssl/src/ext.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! the extended functionalities that are yet exposed via the [`boring`] APIs

use boring::error::ErrorStack;
use boring::pkey::{HasPrivate, PKeyRef};
use boring::ssl::{Ssl, SslAcceptor, SslRef};
use boring::x509::store::X509StoreRef;
use boring::x509::verify::X509VerifyParamRef;
use boring::x509::X509Ref;
use foreign_types_shared::ForeignTypeRef;
use libc::*;
use std::ffi::CString;

fn cvt(r: c_int) -> Result<c_int, ErrorStack> {
    if r != 1 {
        Err(ErrorStack::get())
    } else {
        Ok(r)
    }
}

/// Add name as an additional reference identifier that can match the peer's certificate
///
/// See [X509_VERIFY_PARAM_set1_host](https://www.openssl.org/docs/man3.1/man3/X509_VERIFY_PARAM_set1_host.html).
pub fn add_host(verify_param: &mut X509VerifyParamRef, host: &str) -> Result<(), ErrorStack> {
    if host.is_empty() {
        return Ok(());
    }
    unsafe {
        cvt(boring_sys::X509_VERIFY_PARAM_add1_host(
            verify_param.as_ptr(),
            host.as_ptr() as *const _,
            host.len(),
        ))
        .map(|_| ())
    }
}

/// Set the verify cert store of `ssl`
///
/// See [SSL_set1_verify_cert_store](https://www.openssl.org/docs/man1.1.1/man3/SSL_set1_verify_cert_store.html).
pub fn ssl_set_verify_cert_store(
    ssl: &mut SslRef,
    cert_store: &X509StoreRef,
) -> Result<(), ErrorStack> {
    unsafe {
        cvt(boring_sys::SSL_set1_verify_cert_store(
            ssl.as_ptr(),
            cert_store.as_ptr(),
        ))?;
    }
    Ok(())
}

/// Load the certificate into `ssl`
///
/// See [SSL_use_certificate](https://www.openssl.org/docs/man1.1.1/man3/SSL_use_certificate.html).
pub fn ssl_use_certificate(ssl: &mut SslRef, cert: &X509Ref) -> Result<(), ErrorStack> {
    unsafe {
        cvt(boring_sys::SSL_use_certificate(ssl.as_ptr(), cert.as_ptr()))?;
    }
    Ok(())
}

/// Load the private key into `ssl`
///
/// See [SSL_use_certificate](https://www.openssl.org/docs/man1.1.1/man3/SSL_use_PrivateKey.html).
pub fn ssl_use_private_key<T>(ssl: &mut SslRef, key: &PKeyRef<T>) -> Result<(), ErrorStack>
where
    T: HasPrivate,
{
    unsafe {
        cvt(boring_sys::SSL_use_PrivateKey(ssl.as_ptr(), key.as_ptr()))?;
    }
    Ok(())
}

/// Add the certificate into the cert chain of `ssl`
///
/// See [SSL_add1_chain_cert](https://www.openssl.org/docs/man1.1.1/man3/SSL_add1_chain_cert.html)
pub fn ssl_add_chain_cert(ssl: &mut SslRef, cert: &X509Ref) -> Result<(), ErrorStack> {
    unsafe {
        cvt(boring_sys::SSL_add1_chain_cert(ssl.as_ptr(), cert.as_ptr()))?;
    }
    Ok(())
}

/// Set renegotiation
///
/// This function is specific to BoringSSL
/// See <https://commondatastorage.googleapis.com/chromium-boringssl-docs/ssl.h.html#SSL_set_renegotiate_mode>
pub fn ssl_set_renegotiate_mode_freely(ssl: &mut SslRef) {
    unsafe {
        boring_sys::SSL_set_renegotiate_mode(
            ssl.as_ptr(),
            boring_sys::ssl_renegotiate_mode_t::ssl_renegotiate_freely,
        );
    }
}

/// Set the curves/groups of `ssl`
///
/// See [set_groups_list](https://www.openssl.org/docs/manmaster/man3/SSL_CTX_set1_curves.html).
pub fn ssl_set_groups_list(ssl: &mut SslRef, groups: &str) -> Result<(), ErrorStack> {
    let groups = CString::new(groups).unwrap();
    unsafe {
        // somehow SSL_set1_groups_list doesn't exist but SSL_set1_curves_list means the same anyways
        cvt(boring_sys::SSL_set1_curves_list(
            ssl.as_ptr(),
            groups.as_ptr(),
        ))?;
    }
    Ok(())
}

/// Set's whether a second keyshare to be sent in client hello when PQ is used.
///
/// Default is true. When `true`, the first PQ (if any) and none-PQ keyshares are sent.
/// When `false`, only the first configured keyshares are sent.
#[cfg(feature = "pq_use_second_keyshare")]
pub fn ssl_use_second_key_share(ssl: &mut SslRef, enabled: bool) {
    unsafe { boring_sys::SSL_use_second_keyshare(ssl.as_ptr(), enabled as _) }
}
#[cfg(not(feature = "pq_use_second_keyshare"))]
pub fn ssl_use_second_key_share(_ssl: &mut SslRef, _enabled: bool) {}

/// Clear the error stack
///
/// SSL calls should check and clear the BoringSSL error stack. But some calls fail to do so.
/// This causes the next unrelated SSL call to fail due to the leftover errors. This function allows
/// the caller to clear the error stack before performing SSL calls to avoid this issue.
pub fn clear_error_stack() {
    let _ = ErrorStack::get();
}

/// Create a new [Ssl] from &[SslAcceptor]
///
/// This function is needed because [Ssl::new()] doesn't take `&SslContextRef` like openssl-rs
pub fn ssl_from_acceptor(acceptor: &SslAcceptor) -> Result<Ssl, ErrorStack> {
    Ssl::new_from_ref(acceptor.context())
}

/// Suspend the TLS handshake when a certificate is needed.
///
/// This function will cause tls handshake to pause and return the error: SSL_ERROR_WANT_X509_LOOKUP.
/// The caller should set the certificate and then call [unblock_ssl_cert()] before continue the
/// handshake on the tls connection.
pub fn suspend_when_need_ssl_cert(ssl: &mut SslRef) {
    unsafe {
        boring_sys::SSL_set_cert_cb(ssl.as_ptr(), Some(raw_cert_block), std::ptr::null_mut());
    }
}

/// Unblock a TLS handshake after the certificate is set.
///
/// The user should continue to call tls handshake after this function is called.
pub fn unblock_ssl_cert(ssl: &mut SslRef) {
    unsafe {
        boring_sys::SSL_set_cert_cb(ssl.as_ptr(), None, std::ptr::null_mut());
    }
}

// Just block the handshake
extern "C" fn raw_cert_block(_ssl: *mut boring_sys::SSL, _arg: *mut c_void) -> c_int {
    -1
}

/// Whether the TLS error is SSL_ERROR_WANT_X509_LOOKUP
pub fn is_suspended_for_cert(error: &boring::ssl::Error) -> bool {
    error.code().as_raw() == boring_sys::SSL_ERROR_WANT_X509_LOOKUP
}

#[allow(clippy::mut_from_ref)]
/// Get a mutable SslRef ouf of SslRef. which is a missing functionality for certain SslStream
/// # Safety
/// the caller needs to make sure that they hold a &mut SslRef
pub unsafe fn ssl_mut(ssl: &SslRef) -> &mut SslRef {
    unsafe { SslRef::from_ptr_mut(ssl.as_ptr()) }
}


================================================
FILE: pingora-boringssl/src/lib.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! The BoringSSL API compatibility layer.
//!
//! This crate aims at making [boring] APIs exchangeable with [openssl-rs](https://docs.rs/openssl/latest/openssl/).
//! In other words, this crate and [`pingora-openssl`](https://docs.rs/pingora-openssl) expose identical rust APIs.

#![warn(clippy::all)]

use boring as ssl_lib;
pub use boring_sys as ssl_sys;
pub mod boring_tokio;
pub use boring_tokio as tokio_ssl;
pub mod ext;

// export commonly used libs
pub use ssl_lib::error;
pub use ssl_lib::hash;
pub use ssl_lib::nid;
pub use ssl_lib::pkey;
pub use ssl_lib::ssl;
pub use ssl_lib::x509;


================================================
FILE: pingora-cache/Cargo.toml
================================================
[package]
name = "pingora-cache"
version = "0.8.0"
authors = ["Yuchen Wu <yuchen@cloudflare.com>"]
license = "Apache-2.0"
edition = "2021"
rust-version = "1.84"
repository = "https://github.com/cloudflare/pingora"
categories = ["asynchronous", "network-programming"]
keywords = ["async", "http", "cache"]
description = """
HTTP caching APIs for Pingora proxy.
"""

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "pingora_cache"
path = "src/lib.rs"

[dependencies]
pingora-core = { version = "0.8.0", path = "../pingora-core", default-features = false }
pingora-error = { version = "0.8.0", path = "../pingora-error" }
pingora-header-serde = { version = "0.8.0", path = "../pingora-header-serde" }
pingora-http = { version = "0.8.0", path = "../pingora-http" }
pingora-lru = { version = "0.8.0", path = "../pingora-lru" }
pingora-timeout = { version = "0.8.0", path = "../pingora-timeout" }
bstr = { workspace = true }
http = { workspace = true }
indexmap = "1"
once_cell = { workspace = true }
regex = "1"
blake2 = "0.10"
serde = { version = "1.0", features = ["derive"] }
rmp-serde = "1.3.0"
bytes = { workspace = true }
httpdate = "1.0.2"
log = { workspace = true }
async-trait = { workspace = true }
parking_lot = "0.12"
cf-rustracing = "1.0"
cf-rustracing-jaeger = "1.0"
rmp = "0.8.14"
tokio = { workspace = true }
lru = { workspace = true }
ahash = { workspace = true }
hex = "0.4"
httparse = { workspace = true }
strum = { version = "0.26", features = ["derive"] }
rand = "0.8"

[dev-dependencies]
tokio-test = "0.4"
tokio = { workspace = true, features = ["fs"] }
env_logger = "0.11"
dhat = "0"
futures = "0.3"

[[bench]]
name = "simple_lru_memory"
harness = false

[[bench]]
name = "lru_memory"
harness = false

[[bench]]
name = "lru_serde"
harness = false

[features]
default = []
openssl = ["pingora-core/openssl"]
boringssl = ["pingora-core/boringssl"]
rustls = ["pingora-core/rustls"]
s2n = ["pingora-core/s2n"]


================================================
FILE: pingora-cache/LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: pingora-cache/benches/lru_memory.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#[global_allocator]
static ALLOC: dhat::Alloc = dhat::Alloc;

use pingora_cache::{
    eviction::{lru::Manager, EvictionManager},
    CacheKey,
};

const ITEMS: usize = 5 * usize::pow(2, 20);

/*
    Total:     681,836,456 bytes (100%, 28,192,797.16/s) in 10,485,845 blocks (100%, 433,572.15/s), avg size 65.02 bytes, avg lifetime 5,935,075.17 µs (24.54% of program duration)
    At t-gmax: 569,114,536 bytes (100%) in 5,242,947 blocks (100%), avg size 108.55 bytes
    At t-end:  88 bytes (100%) in 3 blocks (100%), avg size 29.33 bytes
    Allocated at {
      #0: [root]
    }
  ├── PP 1.1/5 {
  │     Total:     293,601,280 bytes (43.06%, 12,139,921.91/s) in 5,242,880 blocks (50%, 216,784.32/s), avg size 56 bytes, avg lifetime 11,870,032.65 µs (49.08% of program duration)
  │     Max:       293,601,280 bytes in 5,242,880 blocks, avg size 56 bytes
  │     At t-gmax: 293,601,280 bytes (51.59%) in 5,242,880 blocks (100%), avg size 56 bytes
  │     At t-end:  0 bytes (0%) in 0 blocks (0%), avg size 0 bytes
  │     Allocated at {
  │       #1: 0x5555703cf69c: alloc::alloc::exchange_malloc (alloc/src/alloc.rs:326:11)
  │       #2: 0x5555703cf69c: alloc::boxed::Box<T>::new (alloc/src/boxed.rs:215:9)
  │       #3: 0x5555703cf69c: pingora_lru::LruUnit<T>::admit (pingora-lru/src/lib.rs:201:20)
  │       #4: 0x5555703cf69c: pingora_lru::Lru<T,_>::admit (pingora-lru/src/lib.rs:48:26)
  │       #5: 0x5555703cf69c: <pingora_cache::eviction::lru::Manager<_> as pingora_cache::eviction::EvictionManager>::admit (src/eviction/lru.rs:114:9)
  │       #6: 0x5555703cf69c: lru_memory::main (pingora-cache/benches/lru_memory.rs:78:9)
  │     }
  │   }
  ├── PP 1.2/5 {
  │     Total:     203,685,456 bytes (29.87%, 8,422,052.97/s) in 50 blocks (0%, 2.07/s), avg size 4,073,709.12 bytes, avg lifetime 6,842,528.74 µs (28.29% of program duration)
  │     Max:       132,906,576 bytes in 32 blocks, avg size 4,153,330.5 bytes
  │     At t-gmax: 132,906,576 bytes (23.35%) in 32 blocks (0%), avg size 4,153,330.5 bytes
  │     At t-end:  0 bytes (0%) in 0 blocks (0%), avg size 0 bytes
  │     Allocated at {
  │       #1: 0x5555703cec54: <alloc::alloc::Global as core::alloc::Allocator>::allocate (alloc/src/alloc.rs:237:9)
  │       #2: 0x5555703cec54: alloc::raw_vec::RawVec<T,A>::allocate_in (alloc/src/raw_vec.rs:185:45)
  │       #3: 0x5555703cec54: alloc::raw_vec::RawVec<T,A>::with_capacity_in (alloc/src/raw_vec.rs:131:9)
  │       #4: 0x5555703cec54: alloc::vec::Vec<T,A>::with_capacity_in (src/vec/mod.rs:641:20)
  │       #5: 0x5555703cec54: alloc::vec::Vec<T>::with_capacity (src/vec/mod.rs:483:9)
  │       #6: 0x5555703cec54: pingora_lru::linked_list::Nodes::with_capacity (pingora-lru/src/linked_list.rs:50:25)
  │       #7: 0x5555703cec54: pingora_lru::linked_list::LinkedList::with_capacity (pingora-lru/src/linked_list.rs:121:20)
  │       #8: 0x5555703cec54: pingora_lru::LruUnit<T>::with_capacity (pingora-lru/src/lib.rs:176:20)
  │       #9: 0x5555703cec54: pingora_lru::Lru<T,_>::with_capacity (pingora-lru/src/lib.rs:28:36)
  │       #10: 0x5555703cec54: pingora_cache::eviction::lru::Manager<_>::with_capacity (src/eviction/lru.rs:22:17)
  │       #11: 0x5555703cec54: lru_memory::main (pingora-cache/benches/lru_memory.rs:74:19)
  │     }
  │   }
  ├── PP 1.3/5 {
  │     Total:     142,606,592 bytes (20.92%, 5,896,544.09/s) in 32 blocks (0%, 1.32/s), avg size 4,456,456 bytes, avg lifetime 22,056,252.88 µs (91.2% of program duration)
  │     Max:       142,606,592 bytes in 32 blocks, avg size 4,456,456 bytes
  │     At t-gmax: 142,606,592 bytes (25.06%) in 32 blocks (0%), avg size 4,456,456 bytes
  │     At t-end:  0 bytes (0%) in 0 blocks (0%), avg size 0 bytes
  │     Allocated at {
  │       #1: 0x5555703ceb64: alloc::alloc::alloc (alloc/src/alloc.rs:95:14)
  │       #2: 0x5555703ceb64: <hashbrown::raw::alloc::inner::Global as hashbrown::raw::alloc::inner::Allocator>::allocate (src/raw/alloc.rs:47:35)
  │       #3: 0x5555703ceb64: hashbrown::raw::alloc::inner::do_alloc (src/raw/alloc.rs:62:9)
  │       #4: 0x5555703ceb64: hashbrown::raw::RawTableInner<A>::new_uninitialized (src/raw/mod.rs:1080:38)
  │       #5: 0x5555703ceb64: hashbrown::raw::RawTableInner<A>::fallible_with_capacity (src/raw/mod.rs:1109:30)
  │       #6: 0x5555703ceb64: hashbrown::raw::RawTable<T,A>::fallible_with_capacity (src/raw/mod.rs:460:20)
  │       #7: 0x5555703ceb64: hashbrown::raw::RawTable<T,A>::with_capacity_in (src/raw/mod.rs:481:15)
  │       #8: 0x5555703ceb64: hashbrown::raw::RawTable<T>::with_capacity (src/raw/mod.rs:411:9)
  │       #9: 0x5555703ceb64: hashbrown::map::HashMap<K,V,S>::with_capacity_and_hasher (hashbrown-0.12.3/src/map.rs:422:20)
  │       #10: 0x5555703ceb64: hashbrown::map::HashMap<K,V>::with_capacity (hashbrown-0.12.3/src/map.rs:326:9)
  │       #11: 0x5555703ceb64: pingora_lru::LruUnit<T>::with_capacity (pingora-lru/src/lib.rs:175:27)
  │       #12: 0x5555703ceb64: pingora_lru::Lru<T,_>::with_capacity (pingora-lru/src/lib.rs:28:36)
  │       #13: 0x5555703ceb64: pingora_cache::eviction::lru::Manager<_>::with_capacity (src/eviction/lru.rs:22:17)
  │       #14: 0x5555703ceb64: lru_memory::main (pingora-cache/benches/lru_memory.rs:74:19)
  │     }
  │   }
*/
fn main() {
    let _profiler = dhat::Profiler::new_heap();
    let manager = Manager::<32>::with_capacity(ITEMS, ITEMS / 32);
    let unused_ttl = std::time::SystemTime::now();
    for i in 0..ITEMS {
        let item = CacheKey::new("", i.to_string(), "").to_compact();
        manager.admit(item, 1, unused_ttl);
    }
}


================================================
FILE: pingora-cache/benches/lru_serde.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::time::Instant;

use pingora_cache::{
    eviction::{lru::Manager, EvictionManager},
    CacheKey,
};

const ITEMS: usize = 5 * usize::pow(2, 20);

fn main() {
    let manager = Manager::<32>::with_capacity(ITEMS, ITEMS / 32);
    let manager2 = Manager::<32>::with_capacity(ITEMS, ITEMS / 32);
    let unused_ttl = std::time::SystemTime::now();
    for i in 0..ITEMS {
        let item = CacheKey::new("", i.to_string(), "").to_compact();
        manager.admit(item, 1, unused_ttl);
    }

    /* lru serialize shard 19 22.573338ms, 5241623 bytes
     * lru deserialize shard 19 39.260669ms, 5241623 bytes */
    for i in 0..32 {
        let before = Instant::now();
        let ser = manager.serialize_shard(i).unwrap();
        let elapsed = before.elapsed();
        println!("lru serialize shard {i} {elapsed:?}, {} bytes", ser.len());

        let before = Instant::now();
        manager2.deserialize_shard(&ser).unwrap();
        let elapsed = before.elapsed();
        println!("lru deserialize shard {i} {elapsed:?}, {} bytes", ser.len());
    }
}


================================================
FILE: pingora-cache/benches/simple_lru_memory.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#[global_allocator]
static ALLOC: dhat::Alloc = dhat::Alloc;

use pingora_cache::{
    eviction::{simple_lru::Manager, EvictionManager},
    CacheKey,
};

const ITEMS: usize = 5 * usize::pow(2, 20);

/*
   Total:     704,643,412 bytes (100%, 29,014,058.85/s) in 10,485,787 blocks (100%, 431,757.73/s), avg size 67.2 bytes, avg lifetime 6,163,799.09 µs (25.38% of program duration)
   At t-gmax: 520,093,936 bytes (100%) in 5,242,886 blocks (100%), avg size 99.2 bytes
  ├── PP 1.1/4 {
  │     Total:     377,487,360 bytes (53.57%, 15,543,238.31/s) in 5,242,880 blocks (50%, 215,878.31/s), avg size 72 bytes, avg lifetime 12,327,602.83 µs (50.76% of program duration)
  │     Max:       377,487,360 bytes in 5,242,880 blocks, avg size 72 bytes
  │     At t-gmax: 377,487,360 bytes (72.58%) in 5,242,880 blocks (100%), avg size 72 bytes
  │     At t-end:  0 bytes (0%) in 0 blocks (0%), avg size 0 bytes
  │     Allocated at {
  │       #1: 0x5555791dd7e0: alloc::alloc::exchange_malloc (alloc/src/alloc.rs:326:11)
  │       #2: 0x5555791dd7e0: alloc::boxed::Box<T>::new (alloc/src/boxed.rs:215:9)
  │       #3: 0x5555791dd7e0: lru::LruCache<K,V,S>::replace_or_create_node (lru-0.8.1/src/lib.rs:391:20)
  │       #4: 0x5555791dd7e0: lru::LruCache<K,V,S>::capturing_put (lru-0.8.1/src/lib.rs:355:44)
  │       #5: 0x5555791dd7e0: lru::LruCache<K,V,S>::push (lru-0.8.1/src/lib.rs:334:9)
  │       #6: 0x5555791dd7e0: pingora_cache::eviction::simple_lru::Manager::insert (src/eviction/simple_lru.rs:49:23)
  │       #7: 0x5555791dd7e0: <pingora_cache::eviction::simple_lru::Manager as pingora_cache::eviction::EvictionManager>::admit (src/eviction/simple_lru.rs:166:9)
  │       #8: 0x5555791dd7e0: simple_lru_memory::main (pingora-cache/benches/simple_lru_memory.rs:21:9)
  │     }
  │   }
  ├── PP 1.2/4 {
  │     Total:     285,212,780 bytes (40.48%, 11,743,784.5/s) in 22 blocks (0%, 0.91/s), avg size 12,964,217.27 bytes, avg lifetime 1,116,774.23 µs (4.6% of program duration)
  │     Max:       213,909,520 bytes in 2 blocks, avg size 106,954,760 bytes
  │     At t-gmax: 142,606,344 bytes (27.42%) in 1 blocks (0%), avg size 142,606,344 bytes
  │     At t-end:  0 bytes (0%) in 0 blocks (0%), avg size 0 bytes
  │     Allocated at {
  │       #1: 0x5555791dae20: alloc::alloc::alloc (alloc/src/alloc.rs:95:14)
  │       #2: 0x5555791dae20: <hashbrown::raw::alloc::inner::Global as hashbrown::raw::alloc::inner::Allocator>::allocate (src/raw/alloc.rs:47:35)
  │       #3: 0x5555791dae20: hashbrown::raw::alloc::inner::do_alloc (src/raw/alloc.rs:62:9)
  │       #4: 0x5555791dae20: hashbrown::raw::RawTableInner<A>::new_uninitialized (src/raw/mod.rs:1080:38)
  │       #5: 0x5555791dae20: hashbrown::raw::RawTableInner<A>::fallible_with_capacity (src/raw/mod.rs:1109:30)
  │       #6: 0x5555791dae20: hashbrown::raw::RawTableInner<A>::prepare_resize (src/raw/mod.rs:1353:29)
  │       #7: 0x5555791dae20: hashbrown::raw::RawTableInner<A>::resize_inner (src/raw/mod.rs:1426:29)
  │       #8: 0x5555791dae20: hashbrown::raw::RawTableInner<A>::reserve_rehash_inner (src/raw/mod.rs:1403:13)
  │       #9: 0x5555791dae20: hashbrown::raw::RawTable<T,A>::reserve_rehash (src/raw/mod.rs:680:13)
  │       #10: 0x5555791dde50: hashbrown::raw::RawTable<T,A>::reserve (src/raw/mod.rs:646:16)
  │       #11: 0x5555791dde50: hashbrown::raw::RawTable<T,A>::insert (src/raw/mod.rs:725:17)
  │       #12: 0x5555791dde50: hashbrown::map::HashMap<K,V,S,A>::insert (hashbrown-0.12.3/src/map.rs:1679:13)
  │       #13: 0x5555791dde50: lru::LruCache<K,V,S>::capturing_put (lru-0.8.1/src/lib.rs:361:17)
  │       #14: 0x5555791dde50: lru::LruCache<K,V,S>::push (lru-0.8.1/src/lib.rs:334:9)
  │       #15: 0x5555791dde50: pingora_cache::eviction::simple_lru::Manager::insert (src/eviction/simple_lru.rs:49:23)
  │       #16: 0x5555791dde50: <pingora_cache::eviction::simple_lru::Manager as pingora_cache::eviction::EvictionManager>::admit (src/eviction/simple_lru.rs:166:9)
  │       #17: 0x5555791dde50: simple_lru_memory::main (pingora-cache/benches/simple_lru_memory.rs:21:9)
  │     }
  │   }
*/
fn main() {
    let _profiler = dhat::Profiler::new_heap();
    let manager = Manager::new(ITEMS);
    let unused_ttl = std::time::SystemTime::now();
    for i in 0..ITEMS {
        let item = CacheKey::new("", i.to_string(), "").to_compact();
        manager.admit(item, 1, unused_ttl);
    }
}


================================================
FILE: pingora-cache/src/cache_control.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Functions and utilities to help parse Cache-Control headers

use super::*;

use http::header::HeaderName;
use http::HeaderValue;
use indexmap::IndexMap;
use once_cell::sync::Lazy;
use pingora_error::{Error, ErrorType};
use regex::bytes::Regex;
use std::num::IntErrorKind;
use std::slice;
use std::str;

/// The max delta-second per [RFC 9111](https://datatracker.ietf.org/doc/html/rfc9111#section-1.2.2)
// "If a cache receives a delta-seconds value
// greater than the greatest integer it can represent, or if any of its
// subsequent calculations overflows, the cache MUST consider the value
// to be 2147483648 (2^31) or the greatest positive integer it can
// conveniently represent.
//
//    |  *Note:* The value 2147483648 is here for historical reasons,
//    |  represents infinity (over 68 years), and does not need to be
//    |  stored in binary form; an implementation could produce it as a
//    |  string if any overflow occurs, even if the calculations are
//    |  performed with an arithmetic type incapable of directly
//    |  representing that number.  What matters here is that an
//    |  overflow be detected and not treated as a negative value in
//    |  later calculations."
//
// We choose to use i32::MAX for our overflow value to stick to the letter of the RFC.
pub const DELTA_SECONDS_OVERFLOW_VALUE: u32 = i32::MAX as u32;
pub const DELTA_SECONDS_OVERFLOW_DURATION: Duration =
    Duration::from_secs(DELTA_SECONDS_OVERFLOW_VALUE as u64);

/// Cache control directive key type
pub type DirectiveKey = String;

/// Cache control directive value type
#[derive(Debug)]
pub struct DirectiveValue(pub Vec<u8>);

impl AsRef<[u8]> for DirectiveValue {
    fn as_ref(&self) -> &[u8] {
        &self.0
    }
}

impl DirectiveValue {
    /// A [DirectiveValue] without quotes (`"`).
    pub fn parse_as_bytes(&self) -> &[u8] {
        self.0
            .strip_prefix(b"\"")
            .and_then(|bytes| bytes.strip_suffix(b"\""))
            .unwrap_or(&self.0[..])
    }

    /// A [DirectiveValue] without quotes (`"`) as `str`.
    pub fn parse_as_str(&self) -> Result<&str> {
        str::from_utf8(self.parse_as_bytes()).or_else(|e| {
            Error::e_because(ErrorType::InternalError, "could not parse value as utf8", e)
        })
    }

    /// Parse the [DirectiveValue] as delta seconds
    ///
    /// `"`s are ignored. The value is capped to [DELTA_SECONDS_OVERFLOW_VALUE].
    pub fn parse_as_delta_seconds(&self) -> Result<u32> {
        match self.parse_as_str()?.parse::<u32>() {
            Ok(value) => Ok(value),
            Err(e) => {
                // delta-seconds expect to handle positive overflow gracefully
                if e.kind() == &IntErrorKind::PosOverflow {
                    Ok(DELTA_SECONDS_OVERFLOW_VALUE)
                } else {
                    Error::e_because(ErrorType::InternalError, "could not parse value as u32", e)
                }
            }
        }
    }
}

/// An ordered map to store cache control key value pairs.
pub type DirectiveMap = IndexMap<DirectiveKey, Option<DirectiveValue>>;

/// Parsed Cache-Control directives
#[derive(Debug)]
pub struct CacheControl {
    /// The parsed directives
    pub directives: DirectiveMap,
}

/// Cacheability calculated from cache control.
#[derive(Debug, PartialEq, Eq)]
pub enum Cacheable {
    /// Cacheable
    Yes,
    /// Not cacheable
    No,
    /// No directive found for explicit cacheability
    Default,
}

/// An iter over all the cache control directives
pub struct ListValueIter<'a>(slice::Split<'a, u8, fn(&u8) -> bool>);

impl<'a> ListValueIter<'a> {
    pub fn from(value: &'a DirectiveValue) -> Self {
        ListValueIter(value.parse_as_bytes().split(|byte| byte == &b','))
    }
}

// https://datatracker.ietf.org/doc/html/rfc9110#name-whitespace
// optional whitespace OWS = *(SP / HTAB); SP = 0x20, HTAB = 0x09
fn trim_ows(bytes: &[u8]) -> &[u8] {
    fn not_ows(b: &u8) -> bool {
        b != &b'\x20' && b != &b'\x09'
    }
    // find first non-OWS char from front (head) and from end (tail)
    let head = bytes.iter().position(not_ows).unwrap_or(0);
    let tail = bytes
        .iter()
        .rposition(not_ows)
        .map(|rpos| rpos + 1)
        .unwrap_or(head);
    &bytes[head..tail]
}

impl<'a> Iterator for ListValueIter<'a> {
    type Item = &'a [u8];

    fn next(&mut self) -> Option<Self::Item> {
        Some(trim_ows(self.0.next()?))
    }
}

// Originally from https://github.com/hapijs/wreck which has the following comments:
// Cache-Control   = 1#cache-directive
// cache-directive = token [ "=" ( token / quoted-string ) ]
// token           = [^\x00-\x20\(\)<>@\,;\:\\"\/\[\]\?\=\{\}\x7F]+
// quoted-string   = "(?:[^"\\]|\\.)*"
//
// note the `token` implementation excludes disallowed ASCII ranges
// and disallowed delimiters: https://datatracker.ietf.org/doc/html/rfc9110#section-5.6.2
// though it does not forbid `obs-text`: %x80-FF
static RE_CACHE_DIRECTIVE: Lazy<Regex> =
    // to break our version down further:
    // `(?-u)`: unicode support disabled, which puts the regex into "ASCII compatible mode" for specifying literal bytes like \x7F: https://docs.rs/regex/1.10.4/regex/bytes/index.html#syntax
    // `(?:^|(?:\s*[,;]\s*)`: allow either , or ; as a delimiter
    // `([^\x00-\x20\(\)<>@,;:\\"/\[\]\?=\{\}\x7F]+)`: token (directive name capture group)
    // `(?:=((?:[^\x00-\x20\(\)<>@,;:\\"/\[\]\?=\{\}\x7F]+|(?:"(?:[^"\\]|\\.)*"))))`: token OR quoted-string (directive value capture-group)
    Lazy::new(|| {
        Regex::new(r#"(?-u)(?:^|(?:\s*[,;]\s*))([^\x00-\x20\(\)<>@,;:\\"/\[\]\?=\{\}\x7F]+)(?:=((?:[^\x00-\x20\(\)<>@,;:\\"/\[\]\?=\{\}\x7F]+|(?:"(?:[^"\\]|\\.)*"))))?"#).unwrap()
    });

impl CacheControl {
    // Our parsing strategy is more permissive than the RFC in a few ways:
    // - Allows semicolons as delimiters (in addition to commas). See the regex above.
    // - Allows octets outside of visible ASCII in `token`s, and in later RFCs, octets outside of
    //   the `quoted-string` range: https://datatracker.ietf.org/doc/html/rfc9110#section-5.6.2
    //   See the regex above.
    // - Doesn't require no-value for "boolean directives," such as must-revalidate
    // - Allows quoted-string format for numeric values.
    fn from_headers(headers: http::header::GetAll<HeaderValue>) -> Option<Self> {
        let mut directives = IndexMap::new();
        // should iterate in header line insertion order
        for line in headers {
            for captures in RE_CACHE_DIRECTIVE.captures_iter(line.as_bytes()) {
                // directive key
                // header values don't have to be utf-8, but we store keys as strings for case-insensitive hashing
                let key = captures.get(1).and_then(|cap| {
                    str::from_utf8(cap.as_bytes())
                        .ok()
                        .map(|token| token.to_lowercase())
                });
                if key.is_none() {
                    continue;
                }
                // directive value
                // match token or quoted-string
                let value = captures
                    .get(2)
                    .map(|cap| DirectiveValue(cap.as_bytes().to_vec()));
                directives.insert(key.unwrap(), value);
            }
        }
        Some(CacheControl { directives })
    }

    /// Parse from the given header name in `headers`
    pub fn from_headers_named(header_name: &str, headers: &http::HeaderMap) -> Option<Self> {
        if !headers.contains_key(header_name) {
            return None;
        }

        Self::from_headers(headers.get_all(header_name))
    }

    /// Parse from the given header name in the [ReqHeader]
    pub fn from_req_headers_named(header_name: &str, req_header: &ReqHeader) -> Option<Self> {
        Self::from_headers_named(header_name, &req_header.headers)
    }

    /// Parse `Cache-Control` header name from the [ReqHeader]
    pub fn from_req_headers(req_header: &ReqHeader) -> Option<Self> {
        Self::from_req_headers_named("cache-control", req_header)
    }

    /// Parse from the given header name in the [RespHeader]
    pub fn from_resp_headers_named(header_name: &str, resp_header: &RespHeader) -> Option<Self> {
        Self::from_headers_named(header_name, &resp_header.headers)
    }

    /// Parse `Cache-Control` header name from the [RespHeader]
    pub fn from_resp_headers(resp_header: &RespHeader) -> Option<Self> {
        Self::from_resp_headers_named("cache-control", resp_header)
    }

    /// Whether the given directive is in the cache control.
    pub fn has_key(&self, key: &str) -> bool {
        self.directives.contains_key(key)
    }

    /// Whether the `public` directive is in the cache control.
    pub fn public(&self) -> bool {
        self.has_key("public")
    }

    /// Whether the given directive exists, and it has no value.
    fn has_key_without_value(&self, key: &str) -> bool {
        matches!(self.directives.get(key), Some(None))
    }

    /// Whether the standalone `private` exists in the cache control
    // RFC 7234: using the #field-name versions of `private`
    // means a shared cache "MUST NOT store the specified field-name(s),
    // whereas it MAY store the remainder of the response."
    // It must be a boolean form (no value) to apply to the whole response.
    // https://datatracker.ietf.org/doc/html/rfc7234#section-5.2.2.6
    pub fn private(&self) -> bool {
        self.has_key_without_value("private")
    }

    fn get_field_names(&self, key: &str) -> Option<ListValueIter<'_>> {
        let value = self.directives.get(key)?.as_ref()?;
        Some(ListValueIter::from(value))
    }

    /// Get the values of `private=`
    pub fn private_field_names(&self) -> Option<ListValueIter<'_>> {
        self.get_field_names("private")
    }

    /// Whether the standalone `no-cache` exists in the cache control
    pub fn no_cache(&self) -> bool {
        self.has_key_without_value("no-cache")
    }

    /// Get the values of `no-cache=`
    pub fn no_cache_field_names(&self) -> Option<ListValueIter<'_>> {
        self.get_field_names("no-cache")
    }

    /// Whether `no-store` exists.
    pub fn no_store(&self) -> bool {
        self.has_key("no-store")
    }

    fn parse_delta_seconds(&self, key: &str) -> Result<Option<u32>> {
        if let Some(Some(dir_value)) = self.directives.get(key) {
            Ok(Some(dir_value.parse_as_delta_seconds()?))
        } else {
            Ok(None)
        }
    }

    /// Return the `max-age` seconds
    pub fn max_age(&self) -> Result<Option<u32>> {
        self.parse_delta_seconds("max-age")
    }

    /// Return the `s-maxage` seconds
    pub fn s_maxage(&self) -> Result<Option<u32>> {
        self.parse_delta_seconds("s-maxage")
    }

    /// Return the `stale-while-revalidate` seconds
    pub fn stale_while_revalidate(&self) -> Result<Option<u32>> {
        self.parse_delta_seconds("stale-while-revalidate")
    }

    /// Return the `stale-if-error` seconds
    pub fn stale_if_error(&self) -> Result<Option<u32>> {
        self.parse_delta_seconds("stale-if-error")
    }

    /// Whether `must-revalidate` exists.
    pub fn must_revalidate(&self) -> bool {
        self.has_key("must-revalidate")
    }

    /// Whether `proxy-revalidate` exists.
    pub fn proxy_revalidate(&self) -> bool {
        self.has_key("proxy-revalidate")
    }

    /// Whether `only-if-cached` exists.
    pub fn only_if_cached(&self) -> bool {
        self.has_key("only-if-cached")
    }
}

impl InterpretCacheControl for CacheControl {
    fn is_cacheable(&self) -> Cacheable {
        if self.no_store() || self.private() {
            return Cacheable::No;
        }
        if self.has_key("s-maxage") || self.has_key("max-age") || self.public() {
            return Cacheable::Yes;
        }
        Cacheable::Default
    }

    fn allow_caching_authorized_req(&self) -> bool {
        // RFC 7234 https://datatracker.ietf.org/doc/html/rfc7234#section-3
        // "MUST NOT" store requests with Authorization header
        // unless response contains one of these directives
        self.must_revalidate() || self.public() || self.has_key("s-maxage")
    }

    fn fresh_duration(&self) -> Option<Duration> {
        if self.no_cache() {
            // always treated as stale
            return Some(Duration::ZERO);
        }
        let seconds = self
            .s_maxage()
            .ok()?
            // s-maxage not present
            .or_else(|| self.max_age().unwrap_or(None))
            .map(|duration| Duration::from_secs(duration as u64))?;
        Some(seconds)
    }

    fn serve_stale_while_revalidate_duration(&self) -> Option<Duration> {
        // RFC 7234: these directives forbid serving stale.
        // https://datatracker.ietf.org/doc/html/rfc7234#section-4.2.4
        if self.must_revalidate() || self.proxy_revalidate() || self.has_key("s-maxage") {
            return Some(Duration::ZERO);
        }
        self.stale_while_revalidate()
            .unwrap_or(None)
            .map(|secs| Duration::from_secs(secs as u64))
    }

    fn serve_stale_if_error_duration(&self) -> Option<Duration> {
        if self.must_revalidate() || self.proxy_revalidate() || self.has_key("s-maxage") {
            return Some(Duration::ZERO);
        }
        self.stale_if_error()
            .unwrap_or(None)
            .map(|secs| Duration::from_secs(secs as u64))
    }

    // Strip header names listed in `private` or `no-cache` directives from a response.
    fn strip_private_headers(&self, resp_header: &mut ResponseHeader) {
        fn strip_listed_headers(resp: &mut ResponseHeader, field_names: ListValueIter) {
            for name in field_names {
                if let Ok(header) = HeaderName::from_bytes(name) {
                    resp.remove_header(&header);
                }
            }
        }

        if let Some(headers) = self.private_field_names() {
            strip_listed_headers(resp_header, headers);
        }
        // We interpret `no-cache` the same way as `private`,
        // though technically it has a less restrictive requirement
        // ("MUST NOT be sent in the response to a subsequent request
        // without successful revalidation with the origin server").
        // https://datatracker.ietf.org/doc/html/rfc7234#section-5.2.2.2
        if let Some(headers) = self.no_cache_field_names() {
            strip_listed_headers(resp_header, headers);
        }
    }
}

/// `InterpretCacheControl` provides a meaningful interface to the parsed `CacheControl`.
/// These functions actually interpret the parsed cache-control directives to return
/// the freshness or other cache meta values that cache-control is signaling.
///
/// By default `CacheControl` implements an RFC-7234 compliant reading that assumes it is being
/// used with a shared (proxy) cache.
pub trait InterpretCacheControl {
    /// Does cache-control specify this response is cacheable?
    ///
    /// Note that an RFC-7234 compliant cacheability check must also
    /// check if the request contained the Authorization header and
    /// `allow_caching_authorized_req`.
    fn is_cacheable(&self) -> Cacheable;

    /// Does this cache-control allow caching a response to
    /// a request with the Authorization header?
    fn allow_caching_authorized_req(&self) -> bool;

    /// Returns freshness ttl specified in cache-control
    ///
    /// - `Some(_)` indicates cache-control specifies a valid ttl. Some(Duration::ZERO) = always stale.
    /// - `None` means cache-control did not specify a valid ttl.
    fn fresh_duration(&self) -> Option<Duration>;

    /// Returns stale-while-revalidate ttl,
    ///
    /// The result should consider all the relevant cache directives, not just SWR header itself.
    ///
    /// Some(0) means serving such stale is disallowed by directive like `must-revalidate`
    /// or `stale-while-revalidater=0`.
    ///
    /// `None` indicates no SWR ttl was specified.
    fn serve_stale_while_revalidate_duration(&self) -> Option<Duration>;

    /// Returns stale-if-error ttl,
    ///
    /// The result should consider all the relevant cache directives, not just SIE header itself.
    ///
    /// Some(0) means serving such stale is disallowed by directive like `must-revalidate`
    /// or `stale-if-error=0`.
    ///
    /// `None` indicates no SIE ttl was specified.
    fn serve_stale_if_error_duration(&self) -> Option<Duration>;

    /// Strip header names listed in `private` or `no-cache` directives from a response,
    /// usually prior to storing that response in cache.
    fn strip_private_headers(&self, resp_header: &mut ResponseHeader);
}

#[cfg(test)]
mod tests {
    use super::*;
    use http::header::CACHE_CONTROL;
    use http::{request, response};

    fn build_response(cc_key: HeaderName, cc_value: &str) -> response::Parts {
        let (parts, _) = response::Builder::new()
            .header(cc_key, cc_value)
            .body(())
            .unwrap()
            .into_parts();
        parts
    }

    #[test]
    fn test_simple_cache_control() {
        let resp = build_response(CACHE_CONTROL, "public, max-age=10000");
        let cc = CacheControl::from_resp_headers(&resp).unwrap();
        assert!(cc.public());
        assert_eq!(cc.max_age().unwrap().unwrap(), 10000);
    }

    #[test]
    fn test_private_cache_control() {
        let resp = build_response(CACHE_CONTROL, "private");
        let cc = CacheControl::from_resp_headers(&resp).unwrap();

        assert!(cc.private());
        assert!(cc.max_age().unwrap().is_none());
    }

    #[test]
    fn test_directives_across_header_lines() {
        let (parts, _) = response::Builder::new()
            .header(CACHE_CONTROL, "public,")
            .header("cache-Control", "max-age=10000")
            .body(())
            .unwrap()
            .into_parts();
        let cc = CacheControl::from_resp_headers(&parts).unwrap();

        assert!(cc.public());
        assert_eq!(cc.max_age().unwrap().unwrap(), 10000);
    }

    #[test]
    fn test_recognizes_semicolons_as_delimiters() {
        let resp = build_response(CACHE_CONTROL, "public; max-age=0");
        let cc = CacheControl::from_resp_headers(&resp).unwrap();

        assert!(cc.public());
        assert_eq!(cc.max_age().unwrap().unwrap(), 0);
    }

    #[test]
    fn test_unknown_directives() {
        let resp = build_response(CACHE_CONTROL, "public,random1=random2, rand3=\"\"");
        let cc = CacheControl::from_resp_headers(&resp).unwrap();
        let mut directive_iter = cc.directives.iter();

        let first = directive_iter.next().unwrap();
        assert_eq!(first.0, &"public");
        assert!(first.1.is_none());

        let second = directive_iter.next().unwrap();
        assert_eq!(second.0, &"random1");
        assert_eq!(second.1.as_ref().unwrap().0, "random2".as_bytes());

        let third = directive_iter.next().unwrap();
        assert_eq!(third.0, &"rand3");
        assert_eq!(third.1.as_ref().unwrap().0, "\"\"".as_bytes());

        assert!(directive_iter.next().is_none());
    }

    #[test]
    fn test_case_insensitive_directive_keys() {
        let resp = build_response(
            CACHE_CONTROL,
            "Public=\"something\", mAx-AGe=\"10000\", foo=cRaZyCaSe, bAr=\"inQuotes\"",
        );
        let cc = CacheControl::from_resp_headers(&resp).unwrap();

        assert!(cc.public());
        assert_eq!(cc.max_age().unwrap().unwrap(), 10000);

        let mut directive_iter = cc.directives.iter();
        let first = directive_iter.next().unwrap();
        assert_eq!(first.0, &"public");
        assert_eq!(first.1.as_ref().unwrap().0, "\"something\"".as_bytes());

        let second = directive_iter.next().unwrap();
        assert_eq!(second.0, &"max-age");
        assert_eq!(second.1.as_ref().unwrap().0, "\"10000\"".as_bytes());

        // values are still stored with casing
        let third = directive_iter.next().unwrap();
        assert_eq!(third.0, &"foo");
        assert_eq!(third.1.as_ref().unwrap().0, "cRaZyCaSe".as_bytes());

        let fourth = directive_iter.next().unwrap();
        assert_eq!(fourth.0, &"bar");
        assert_eq!(fourth.1.as_ref().unwrap().0, "\"inQuotes\"".as_bytes());

        assert!(directive_iter.next().is_none());
    }

    #[test]
    fn test_non_ascii() {
        let resp = build_response(CACHE_CONTROL, "püblic=💖, max-age=\"💯\"");
        let cc = CacheControl::from_resp_headers(&resp).unwrap();

        // Not considered valid registered directive keys / values
        assert!(!cc.public());
        assert_eq!(
            cc.max_age().unwrap_err().context.unwrap().to_string(),
            "could not parse value as u32"
        );

        let mut directive_iter = cc.directives.iter();
        let first = directive_iter.next().unwrap();
        assert_eq!(first.0, &"püblic");
        assert_eq!(first.1.as_ref().unwrap().0, "💖".as_bytes());

        let second = directive_iter.next().unwrap();
        assert_eq!(second.0, &"max-age");
        assert_eq!(second.1.as_ref().unwrap().0, "\"💯\"".as_bytes());

        assert!(directive_iter.next().is_none());
    }

    #[test]
    fn test_non_utf8_key() {
        let mut resp = response::Builder::new().body(()).unwrap();
        resp.headers_mut().insert(
            CACHE_CONTROL,
            HeaderValue::from_bytes(b"bar\xFF=\"baz\", a=b").unwrap(),
        );
        let (parts, _) = resp.into_parts();
        let cc = CacheControl::from_resp_headers(&parts).unwrap();

        // invalid bytes for key
        let mut directive_iter = cc.directives.iter();
        let first = directive_iter.next().unwrap();
        assert_eq!(first.0, &"a");
        assert_eq!(first.1.as_ref().unwrap().0, "b".as_bytes());

        assert!(directive_iter.next().is_none());
    }

    #[test]
    fn test_non_utf8_value() {
        // RFC 7230: 0xFF is part of obs-text and is officially considered a valid octet in quoted-strings
        let mut resp = response::Builder::new().body(()).unwrap();
        resp.headers_mut().insert(
            CACHE_CONTROL,
            HeaderValue::from_bytes(b"max-age=ba\xFFr, bar=\"baz\xFF\", a=b").unwrap(),
        );
        let (parts, _) = resp.into_parts();
        let cc = CacheControl::from_resp_headers(&parts).unwrap();

        assert_eq!(
            cc.max_age().unwrap_err().context.unwrap().to_string(),
            "could not parse value as utf8"
        );

        let mut directive_iter = cc.directives.iter();

        let first = directive_iter.next().unwrap();
        assert_eq!(first.0, &"max-age");
        assert_eq!(first.1.as_ref().unwrap().0, b"ba\xFFr");

        let second = directive_iter.next().unwrap();
        assert_eq!(second.0, &"bar");
        assert_eq!(second.1.as_ref().unwrap().0, b"\"baz\xFF\"");

        let third = directive_iter.next().unwrap();
        assert_eq!(third.0, &"a");
        assert_eq!(third.1.as_ref().unwrap().0, "b".as_bytes());

        assert!(directive_iter.next().is_none());
    }

    #[test]
    fn test_age_overflow() {
        let resp = build_response(
            CACHE_CONTROL,
            "max-age=-99999999999999999999999999, s-maxage=99999999999999999999999999",
        );
        let cc = CacheControl::from_resp_headers(&resp).unwrap();

        assert_eq!(
            cc.s_maxage().unwrap().unwrap(),
            DELTA_SECONDS_OVERFLOW_VALUE
        );
        // negative ages still result in errors even with overflow handling
        assert_eq!(
            cc.max_age().unwrap_err().context.unwrap().to_string(),
            "could not parse value as u32"
        );
    }

    #[test]
    fn test_fresh_sec() {
        let resp = build_response(CACHE_CONTROL, "");
        let cc = CacheControl::from_resp_headers(&resp).unwrap();
        assert!(cc.fresh_duration().is_none());

        let resp = build_response(CACHE_CONTROL, "max-age=12345");
        let cc = CacheControl::from_resp_headers(&resp).unwrap();
        assert_eq!(cc.fresh_duration().unwrap(), Duration::from_secs(12345));

        let resp = build_response(CACHE_CONTROL, "max-age=99999,s-maxage=123");
        let cc = CacheControl::from_resp_headers(&resp).unwrap();
        // prefer s-maxage over max-age
        assert_eq!(cc.fresh_duration().unwrap(), Duration::from_secs(123));
    }

    #[test]
    fn test_cacheability() {
        let resp = build_response(CACHE_CONTROL, "");
        let cc = CacheControl::from_resp_headers(&resp).unwrap();
        assert_eq!(cc.is_cacheable(), Cacheable::Default);

        // uncacheable
        let resp = build_response(CACHE_CONTROL, "private, max-age=12345");
        let cc = CacheControl::from_resp_headers(&resp).unwrap();
        assert_eq!(cc.is_cacheable(), Cacheable::No);

        let resp = build_response(CACHE_CONTROL, "no-store, max-age=12345");
        let cc = CacheControl::from_resp_headers(&resp).unwrap();
        assert_eq!(cc.is_cacheable(), Cacheable::No);

        // cacheable
        let resp = build_response(CACHE_CONTROL, "public");
        let cc = CacheControl::from_resp_headers(&resp).unwrap();
        assert_eq!(cc.is_cacheable(), Cacheable::Yes);

        let resp = build_response(CACHE_CONTROL, "max-age=0");
        let cc = CacheControl::from_resp_headers(&resp).unwrap();
        assert_eq!(cc.is_cacheable(), Cacheable::Yes);
    }

    #[test]
    fn test_no_cache() {
        let resp = build_response(CACHE_CONTROL, "no-cache, max-age=12345");
        let cc = CacheControl::from_resp_headers(&resp).unwrap();
        assert_eq!(cc.is_cacheable(), Cacheable::Yes);
        assert_eq!(cc.fresh_duration().unwrap(), Duration::ZERO);
    }

    #[test]
    fn test_no_cache_field_names() {
        let resp = build_response(CACHE_CONTROL, "no-cache=\"set-cookie\", max-age=12345");
        let cc = CacheControl::from_resp_headers(&resp).unwrap();
        assert!(!cc.private());
        assert_eq!(cc.is_cacheable(), Cacheable::Yes);
        assert_eq!(cc.fresh_duration().unwrap(), Duration::from_secs(12345));
        let mut field_names = cc.no_cache_field_names().unwrap();
        assert_eq!(
            str::from_utf8(field_names.next().unwrap()).unwrap(),
            "set-cookie"
        );
        assert!(field_names.next().is_none());

        let mut resp = response::Builder::new().body(()).unwrap();
        resp.headers_mut().insert(
            CACHE_CONTROL,
            HeaderValue::from_bytes(
                b"private=\"\", no-cache=\"a\xFF, set-cookie, Baz\x09 , c,d  ,, \"",
            )
            .unwrap(),
        );
        let (parts, _) = resp.into_parts();
        let cc = CacheControl::from_resp_headers(&parts).unwrap();
        let mut field_names = cc.private_field_names().unwrap();
        assert_eq!(str::from_utf8(field_names.next().unwrap()).unwrap(), "");
        assert!(field_names.next().is_none());
        let mut field_names = cc.no_cache_field_names().unwrap();
        assert!(str::from_utf8(field_names.next().unwrap()).is_err());
        assert_eq!(
            str::from_utf8(field_names.next().unwrap()).unwrap(),
            "set-cookie"
        );
        assert_eq!(str::from_utf8(field_names.next().unwrap()).unwrap(), "Baz");
        assert_eq!(str::from_utf8(field_names.next().unwrap()).unwrap(), "c");
        assert_eq!(str::from_utf8(field_names.next().unwrap()).unwrap(), "d");
        assert_eq!(str::from_utf8(field_names.next().unwrap()).unwrap(), "");
        assert_eq!(str::from_utf8(field_names.next().unwrap()).unwrap(), "");
        assert!(field_names.next().is_none());
    }

    #[test]
    fn test_strip_private_headers() {
        let mut resp = ResponseHeader::build(200, None).unwrap();
        resp.append_header(
            CACHE_CONTROL,
            "no-cache=\"x-private-header\", max-age=12345",
        )
        .unwrap();
        resp.append_header("X-Private-Header", "dropped").unwrap();

        let cc = CacheControl::from_resp_headers(&resp).unwrap();
        cc.strip_private_headers(&mut resp);
        assert!(!resp.headers.contains_key("X-Private-Header"));
    }

    #[test]
    fn test_stale_while_revalidate() {
        let resp = build_response(CACHE_CONTROL, "max-age=12345, stale-while-revalidate=5");
        let cc = CacheControl::from_resp_headers(&resp).unwrap();
        assert_eq!(cc.stale_while_revalidate().unwrap().unwrap(), 5);
        assert_eq!(
            cc.serve_stale_while_revalidate_duration().unwrap(),
            Duration::from_secs(5)
        );
        assert!(cc.serve_stale_if_error_duration().is_none());
    }

    #[test]
    fn test_stale_if_error() {
        let resp = build_response(CACHE_CONTROL, "max-age=12345, stale-if-error=3600");
        let cc = CacheControl::from_resp_headers(&resp).unwrap();
        assert_eq!(cc.stale_if_error().unwrap().unwrap(), 3600);
        assert_eq!(
            cc.serve_stale_if_error_duration().unwrap(),
            Duration::from_secs(3600)
        );
        assert!(cc.serve_stale_while_revalidate_duration().is_none());
    }

    #[test]
    fn test_must_revalidate() {
        let resp = build_response(
            CACHE_CONTROL,
            "max-age=12345, stale-while-revalidate=60, stale-if-error=30, must-revalidate",
        );
        let cc = CacheControl::from_resp_headers(&resp).unwrap();
        assert!(cc.must_revalidate());
        assert_eq!(cc.stale_while_revalidate().unwrap().unwrap(), 60);
        assert_eq!(cc.stale_if_error().unwrap().unwrap(), 30);
        assert_eq!(
            cc.serve_stale_while_revalidate_duration().unwrap(),
            Duration::ZERO
        );
        assert_eq!(cc.serve_stale_if_error_duration().unwrap(), Duration::ZERO);
    }

    #[test]
    fn test_proxy_revalidate() {
        let resp = build_response(
            CACHE_CONTROL,
            "max-age=12345, stale-while-revalidate=60, stale-if-error=30, proxy-revalidate",
        );
        let cc = CacheControl::from_resp_headers(&resp).unwrap();
        assert!(cc.proxy_revalidate());
        assert_eq!(cc.stale_while_revalidate().unwrap().unwrap(), 60);
        assert_eq!(cc.stale_if_error().unwrap().unwrap(), 30);
        assert_eq!(
            cc.serve_stale_while_revalidate_duration().unwrap(),
            Duration::ZERO
        );
        assert_eq!(cc.serve_stale_if_error_duration().unwrap(), Duration::ZERO);
    }

    #[test]
    fn test_s_maxage_stale() {
        let resp = build_response(
            CACHE_CONTROL,
            "s-maxage=0, stale-while-revalidate=60, stale-if-error=30",
        );
        let cc = CacheControl::from_resp_headers(&resp).unwrap();
        assert_eq!(cc.stale_while_revalidate().unwrap().unwrap(), 60);
        assert_eq!(cc.stale_if_error().unwrap().unwrap(), 30);
        assert_eq!(
            cc.serve_stale_while_revalidate_duration().unwrap(),
            Duration::ZERO
        );
        assert_eq!(cc.serve_stale_if_error_duration().unwrap(), Duration::ZERO);
    }

    #[test]
    fn test_authorized_request() {
        let resp = build_response(CACHE_CONTROL, "max-age=10");
        let cc = CacheControl::from_resp_headers(&resp).unwrap();
        assert!(!cc.allow_caching_authorized_req());

        let resp = build_response(CACHE_CONTROL, "s-maxage=10");
        let cc = CacheControl::from_resp_headers(&resp).unwrap();
        assert!(cc.allow_caching_authorized_req());

        let resp = build_response(CACHE_CONTROL, "public");
        let cc = CacheControl::from_resp_headers(&resp).unwrap();
        assert!(cc.allow_caching_authorized_req());

        let resp = build_response(CACHE_CONTROL, "must-revalidate, max-age=0");
        let cc = CacheControl::from_resp_headers(&resp).unwrap();
        assert!(cc.allow_caching_authorized_req());

        let resp = build_response(CACHE_CONTROL, "");
        let cc = CacheControl::from_resp_headers(&resp).unwrap();
        assert!(!cc.allow_caching_authorized_req());
    }

    fn build_request(cc_key: HeaderName, cc_value: &str) -> request::Parts {
        let (parts, _) = request::Builder::new()
            .header(cc_key, cc_value)
            .body(())
            .unwrap()
            .into_parts();
        parts
    }

    #[test]
    fn test_request_only_if_cached() {
        let req = build_request(CACHE_CONTROL, "only-if-cached=1");
        let cc = CacheControl::from_req_headers(&req).unwrap();
        assert!(cc.only_if_cached())
    }
}


================================================
FILE: pingora-cache/src/eviction/lru.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! A shared LRU cache manager

use super::EvictionManager;
use crate::key::CompactCacheKey;

use async_trait::async_trait;
use log::{info, warn};
use pingora_error::{BError, ErrorType::*, OrErr, Result};
use pingora_lru::Lru;
use rand::Rng;
use serde::de::SeqAccess;
use serde::{Deserialize, Serialize};
use std::fs::{rename, File};
use std::hash::{Hash, Hasher};
use std::io::prelude::*;
use std::path::Path;
use std::time::SystemTime;

/// A shared LRU cache manager designed to manage a large volume of assets.
///
/// - Space optimized in-memory LRU (see [pingora_lru]).
/// - Instead of a single giant LRU, this struct shards the assets into `N` independent LRUs.
///
/// This allows [EvictionManager::save()] not to lock the entire cache manager while performing
/// serialization.
pub struct Manager<const N: usize>(Lru<CompactCacheKey, N>);

#[derive(Debug, Serialize, Deserialize)]
struct SerdeHelperNode(CompactCacheKey, usize);

impl<const N: usize> Manager<N> {
    /// Create a [Manager] with the given size limit and estimated per shard capacity.
    ///
    /// The `capacity` is for preallocating to avoid reallocation cost when the LRU grows.
    pub fn with_capacity(limit: usize, capacity: usize) -> Self {
        Manager(Lru::with_capacity(limit, capacity))
    }

    /// Create a [Manager] with an optional watermark in addition to weight limit.
    ///
    /// When `watermark` is set, the underlying LRU will also evict to keep total item count
    /// under or equal to that watermark.
    pub fn with_capacity_and_watermark(
        limit: usize,
        capacity: usize,
        watermark: Option<usize>,
    ) -> Self {
        Manager(Lru::with_capacity_and_watermark(limit, capacity, watermark))
    }

    /// Get the number of shards
    pub fn shards(&self) -> usize {
        self.0.shards()
    }

    /// Get the weight (total size) of a specific shard
    pub fn shard_weight(&self, shard: usize) -> usize {
        self.0.shard_weight(shard)
    }

    /// Get the number of items in a specific shard
    pub fn shard_len(&self, shard: usize) -> usize {
        self.0.shard_len(shard)
    }

    /// Get the shard index for a given cache key
    ///
    /// This allows callers to know which shard was affected by an operation
    /// without acquiring any locks.
    pub fn get_shard_for_key(&self, key: &CompactCacheKey) -> usize {
        (u64key(key) % N as u64) as usize
    }

    /// Serialize the given shard
    pub fn serialize_shard(&self, shard: usize) -> Result<Vec<u8>> {
        use rmp_serde::encode::Serializer;
        use serde::ser::SerializeSeq;
        use serde::ser::Serializer as _;

        assert!(shard < N);

        // NOTE: This could use a lot of memory to buffer the serialized data in memory
        // NOTE: This for loop could lock the LRU for too long
        let mut nodes = Vec::with_capacity(self.0.shard_len(shard));
        self.0.iter_for_each(shard, |(node, size)| {
            nodes.push(SerdeHelperNode(node.clone(), size));
        });
        let mut ser = Serializer::new(vec![]);
        let mut seq = ser
            .serialize_seq(Some(self.0.shard_len(shard)))
            .or_err(InternalError, "fail to serialize node")?;
        for node in nodes {
            seq.serialize_element(&node).unwrap(); // write to vec, safe
        }

        seq.end().or_err(InternalError, "when serializing LRU")?;
        Ok(ser.into_inner())
    }

    /// Deserialize a shard
    ///
    /// Shard number is not needed because the key itself will hash to the correct shard.
    pub fn deserialize_shard(&self, buf: &[u8]) -> Result<()> {
        use rmp_serde::decode::Deserializer;
        use serde::de::Deserializer as _;

        let mut de = Deserializer::new(buf);
        let visitor = InsertToManager { lru: self };
        de.deserialize_seq(visitor)
            .or_err(InternalError, "when deserializing LRU")?;
        Ok(())
    }

    /// Peek the weight associated with a cache key without changing its LRU order.
    pub fn peek_weight(&self, item: &CompactCacheKey) -> Option<usize> {
        let key = u64key(item);
        self.0.peek_weight(key)
    }
}

struct InsertToManager<'a, const N: usize> {
    lru: &'a Manager<N>,
}

impl<'de, const N: usize> serde::de::Visitor<'de> for InsertToManager<'_, N> {
    type Value = ();

    fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
        formatter.write_str("array of lru nodes")
    }

    fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
    where
        A: SeqAccess<'de>,
    {
        while let Some(node) = seq.next_element::<SerdeHelperNode>()? {
            let key = u64key(&node.0);
            self.lru.0.insert_tail(key, node.0, node.1); // insert in the back
        }
        Ok(())
    }
}

#[inline]
fn u64key(key: &CompactCacheKey) -> u64 {
    // note that std hash is not uniform, I'm not sure if ahash is also the case
    let mut hasher = ahash::AHasher::default();
    key.hash(&mut hasher);
    hasher.finish()
}

const FILE_NAME: &str = "lru.data";

#[inline]
fn err_str_path(s: &str, path: &Path) -> String {
    format!("{s} {}", path.display())
}

#[async_trait]
impl<const N: usize> EvictionManager for Manager<N> {
    fn total_size(&self) -> usize {
        self.0.weight()
    }
    fn total_items(&self) -> usize {
        self.0.len()
    }
    fn evicted_size(&self) -> usize {
        self.0.evicted_weight()
    }
    fn evicted_items(&self) -> usize {
        self.0.evicted_len()
    }

    fn admit(
        &self,
        item: CompactCacheKey,
        size: usize,
        _fresh_until: SystemTime,
    ) -> Vec<CompactCacheKey> {
        let key = u64key(&item);
        self.0.admit(key, item, size);
        self.0
            .evict_to_limit()
            .into_iter()
            .map(|(key, _weight)| key)
            .collect()
    }

    fn increment_weight(
        &self,
        item: &CompactCacheKey,
        delta: usize,
        max_weight: Option<usize>,
    ) -> Vec<CompactCacheKey> {
        let key = u64key(item);
        self.0.increment_weight(key, delta, max_weight);
        self.0
            .evict_to_limit()
            .into_iter()
            .map(|(key, _weight)| key)
            .collect()
    }

    fn remove(&self, item: &CompactCacheKey) {
        let key = u64key(item);
        self.0.remove(key);
    }

    fn access(&self, item: &CompactCacheKey, size: usize, _fresh_until: SystemTime) -> bool {
        let key = u64key(item);
        if !self.0.promote(key) {
            self.0.admit(key, item.clone(), size);
            false
        } else {
            true
        }
    }

    fn peek(&self, item: &CompactCacheKey) -> bool {
        let key = u64key(item);
        self.0.peek(key)
    }

    async fn save(&self, dir_path: &str) -> Result<()> {
        let dir_path_str = dir_path.to_owned();

        tokio::task::spawn_blocking(move || {
            let dir_path = Path::new(&dir_path_str);
            std::fs::create_dir_all(dir_path)
                .or_err_with(InternalError, || err_str_path("fail to create", dir_path))
        })
        .await
        .or_err(InternalError, "async blocking IO failure")??;

        for i in 0..N {
            let data = self.serialize_shard(i)?;
            let dir_path = dir_path.to_owned();
            tokio::task::spawn_blocking(move || {
                let dir_path = Path::new(&dir_path);
                let final_path = dir_path.join(format!("{}.{i}", FILE_NAME));
                // create a temporary filename using a randomized u32 hash to minimize the chance of multiple writers writing to the same tmp file
                let random_suffix: u32 = rand::thread_rng().gen();
                let temp_path =
                    dir_path.join(format!("{}.{i}.{:08x}.tmp", FILE_NAME, random_suffix));
                let mut file = File::create(&temp_path)
                    .or_err_with(InternalError, || err_str_path("fail to create", &temp_path))?;
                file.write_all(&data).or_err_with(InternalError, || {
                    err_str_path("fail to write to", &temp_path)
                })?;
                file.flush().or_err_with(InternalError, || {
                    err_str_path("fail to flush temp file", &temp_path)
                })?;
                rename(&temp_path, &final_path).or_err_with(InternalError, || {
                    format!(
                        "Failed to rename file from {} to {}",
                        temp_path.display(),
                        final_path.display(),
                    )
                })
            })
            .await
            .or_err(InternalError, "async blocking IO failure")??;
        }
        Ok(())
    }

    async fn load(&self, dir_path: &str) -> Result<()> {
        // TODO: check the saved shards so that we load all the save files
        let mut loaded_shards = 0;
        for i in 0..N {
            let dir_path = dir_path.to_owned();

            let data = tokio::task::spawn_blocking(move || {
                let file_path = Path::new(&dir_path).join(format!("{}.{i}", FILE_NAME));
                let mut file = File::open(&file_path)
                    .or_err_with(InternalError, || err_str_path("fail to open", &file_path))?;
                let mut buffer = Vec::with_capacity(8192);
                file.read_to_end(&mut buffer)
                    .or_err_with(InternalError, || {
                        err_str_path("fail to read from", &file_path)
                    })?;
                Ok::<Vec<u8>, BError>(buffer)
            })
            .await
            .or_err(InternalError, "async blocking IO failure")??;

            if let Err(e) = self.deserialize_shard(&data) {
                warn!("Failed to deserialize shard {}: {}. Skipping shard.", i, e);
                continue; // Skip shard and move onto the next one
            }
            loaded_shards += 1;
        }

        // Log how many shards were successfully loaded
        if loaded_shards < N {
            warn!(
                "Only loaded {}/{} shards. Cache may be incomplete.",
                loaded_shards, N
            )
        } else {
            info!("Successfully loaded {}/{} shards.", loaded_shards, N)
        }

        cleanup_temp_files(dir_path);

        Ok(())
    }
}

fn cleanup_temp_files(dir_path: &str) {
    let dir_path = Path::new(dir_path).to_owned();

    tokio::task::spawn_blocking({
        move || {
            if !dir_path.exists() {
                return;
            }

            let entries = match std::fs::read_dir(&dir_path) {
                Ok(entries) => entries,
                Err(e) => {
                    warn!("Failed to read directory {}: {e}", dir_path.display());
                    return;
                }
            };

            let mut cleaned_count = 0;
            let mut error_count = 0;

            for entry in entries {
                let entry = match entry {
                    Ok(entry) => entry,
                    Err(e) => {
                        warn!(
                            "Failed to read directory entry in {}: {e}",
                            dir_path.display()
                        );
                        error_count += 1;
                        continue;
                    }
                };

                let file_name = entry.file_name();
                let file_name_str = file_name.to_string_lossy();

                if file_name_str.starts_with(FILE_NAME) && file_name_str.ends_with(".tmp") {
                    match std::fs::remove_file(entry.path()) {
                        Ok(()) => {
                            info!("Cleaned up orphaned temp file: {}", entry.path().display());
                            cleaned_count += 1;
                        }
                        Err(e) => {
                            warn!("Failed to remove temp file {}: {e}", entry.path().display());
                            error_count += 1;
                        }
                    }
                }
            }

            if cleaned_count > 0 || error_count > 0 {
                info!(
                    "Temp file cleanup completed. Removed: {cleaned_count}, Errors: {error_count}"
                );
            }
        }
    });
}

#[cfg(test)]
mod test {
    use super::*;
    use crate::CacheKey;

    // we use shard (N) = 1 for eviction consistency in all tests

    #[test]
    fn test_admission() {
        let lru = Manager::<1>::with_capacity(4, 10);
        let key1 = CacheKey::new("", "a", "1").to_compact();
        let until = SystemTime::now(); // unused value as a placeholder
        let v = lru.admit(key1.clone(), 1, until);
        assert_eq!(v.len(), 0);
        let key2 = CacheKey::new("", "b", "1").to_compact();
        let v = lru.admit(key2.clone(), 2, until);
        assert_eq!(v.len(), 0);
        let key3 = CacheKey::new("", "c", "1").to_compact();
        let v = lru.admit(key3, 1, until);
        assert_eq!(v.len(), 0);

        // lru si full (4) now

        let key4 = CacheKey::new("", "d", "1").to_compact();
        let v = lru.admit(key4, 2, until);
        // need to reduce used by at least 2, both key1 and key2 are evicted to make room for 3
        assert_eq!(v.len(), 2);
        assert_eq!(v[0], key1);
        assert_eq!(v[1], key2);
    }

    #[test]
    fn test_access() {
        let lru = Manager::<1>::with_capacity(4, 10);
        let key1 = CacheKey::new("", "a", "1").to_compact();
        let until = SystemTime::now(); // unused value as a placeholder
        let v = lru.admit(key1.clone(), 1, until);
        assert_eq!(v.len(), 0);
        let key2 = CacheKey::new("", "b", "1").to_compact();
        let v = lru.admit(key2.clone(), 2, until);
        assert_eq!(v.len(), 0);
        let key3 = CacheKey::new("", "c", "1").to_compact();
        let v = lru.admit(key3, 1, until);
        assert_eq!(v.len(), 0);

        // lru is full (4) now
        // make key1 most recently used
        lru.access(&key1, 1, until);
        assert_eq!(v.len(), 0);

        let key4 = CacheKey::new("", "d", "1").to_compact();
        let v = lru.admit(key4, 2, until);
        assert_eq!(v.len(), 1);
        assert_eq!(v[0], key2);
    }

    #[test]
    fn test_remove() {
        let lru = Manager::<1>::with_capacity(4, 10);
        let key1 = CacheKey::new("", "a", "1").to_compact();
        let until = SystemTime::now(); // unused value as a placeholder
        let v = lru.admit(key1.clone(), 1, until);
        assert_eq!(v.len(), 0);
        let key2 = CacheKey::new("", "b", "1").to_compact();
        let v = lru.admit(key2.clone(), 2, until);
        assert_eq!(v.len(), 0);
        let key3 = CacheKey::new("", "c", "1").to_compact();
        let v = lru.admit(key3, 1, until);
        assert_eq!(v.len(), 0);

        // lru is full (4) now
        // remove key1
        lru.remove(&key1);

        // key2 is the least recently used one now
        let key4 = CacheKey::new("", "d", "1").to_compact();
        let v = lru.admit(key4, 2, until);
        assert_eq!(v.len(), 1);
        assert_eq!(v[0], key2);
    }

    #[test]
    fn test_access_add() {
        let lru = Manager::<1>::with_capacity(4, 10);
        let until = SystemTime::now(); // unused value as a placeholder

        let key1 = CacheKey::new("", "a", "1").to_compact();
        lru.access(&key1, 1, until);
        let key2 = CacheKey::new("", "b", "1").to_compact();
        lru.access(&key2, 2, until);
        let key3 = CacheKey::new("", "c", "1").to_compact();
        lru.access(&key3, 2, until);

        let key4 = CacheKey::new("", "d", "1").to_compact();
        let v = lru.admit(key4, 2, until);
        // need to reduce used by at least 2, both key1 and key2 are evicted to make room for 3
        assert_eq!(v.len(), 2);
        assert_eq!(v[0], key1);
        assert_eq!(v[1], key2);
    }

    #[test]
    fn test_admit_update() {
        let lru = Manager::<1>::with_capacity(4, 10);
        let key1 = CacheKey::new("", "a", "1").to_compact();
        let until = SystemTime::now(); // unused value as a placeholder
        let v = lru.admit(key1.clone(), 1, until);
        assert_eq!(v.len(), 0);
        let key2 = CacheKey::new("", "b", "1").to_compact();
        let v = lru.admit(key2.clone(), 2, until);
        assert_eq!(v.len(), 0);
        let key3 = CacheKey::new("", "c", "1").to_compact();
        let v = lru.admit(key3, 1, until);
        assert_eq!(v.len(), 0);

        // lru is full (4) now
        // update key2 to reduce its size by 1
        let v = lru.admit(key2, 1, until);
        assert_eq!(v.len(), 0);

        // lru is not full anymore
        let key4 = CacheKey::new("", "d", "1").to_compact();
        let v = lru.admit(key4.clone(), 1, until);
        assert_eq!(v.len(), 0);

        // make key4 larger
        let v = lru.admit(key4, 2, until);
        // need to evict now
        assert_eq!(v.len(), 1);
        assert_eq!(v[0], key1);
    }

    #[test]
    fn test_peek() {
        let lru = Manager::<1>::with_capacity(4, 10);
        let until = SystemTime::now(); // unused value as a placeholder

        let key1 = CacheKey::new("", "a", "1").to_compact();
        lru.access(&key1, 1, until);
        let key2 = CacheKey::new("", "b", "1").to_compact();
        lru.access(&key2, 2, until);
        assert!(lru.peek(&key1));
        assert!(lru.peek(&key2));
    }

    #[test]
    fn test_serde() {
        let lru = Manager::<1>::with_capacity(4, 10);
        let key1 = CacheKey::new("", "a", "1").to_compact();
        let until = SystemTime::now(); // unused value as a placeholder
        let v = lru.admit(key1.clone(), 1, until);
        assert_eq!(v.len(), 0);
        let key2 = CacheKey::new("", "b", "1").to_compact();
        let v = lru.admit(key2.clone(), 2, until);
        assert_eq!(v.len(), 0);
        let key3 = CacheKey::new("", "c", "1").to_compact();
        let v = lru.admit(key3, 1, until);
        assert_eq!(v.len(), 0);

        // lru is full (4) now
        // make key1 most recently used
        lru.access(&key1, 1, until);
        assert_eq!(v.len(), 0);

        // load lru2 with lru's data
        let ser = lru.serialize_shard(0).unwrap();
        let lru2 = Manager::<1>::with_capacity(4, 10);
        lru2.deserialize_shard(&ser).unwrap();

        let key4 = CacheKey::new("", "d", "1").to_compact();
        let v = lru2.admit(key4, 2, until);
        assert_eq!(v.len(), 1);
        assert_eq!(v[0], key2);
    }

    #[tokio::test]
    async fn test_save_to_disk() {
        let until = SystemTime::now(); // unused value as a placeholder
        let lru = Manager::<2>::with_capacity(10, 10);

        lru.admit(CacheKey::new("", "a", "1").to_compact(), 1, until);
        lru.admit(CacheKey::new("", "b", "1").to_compact(), 2, until);
        lru.admit(CacheKey::new("", "c", "1").to_compact(), 1, until);
        lru.admit(CacheKey::new("", "d", "1").to_compact(), 1, until);
        lru.admit(CacheKey::new("", "e", "1").to_compact(), 2, until);
        lru.admit(CacheKey::new("", "f", "1").to_compact(), 1, until);

        // load lru2 with lru's data
        lru.save("/tmp/test_lru_save").await.unwrap();
        let lru2 = Manager::<2>::with_capacity(4, 10);
        lru2.load("/tmp/test_lru_save").await.unwrap();

        let ser0 = lru.serialize_shard(0).unwrap();
        let ser1 = lru.serialize_shard(1).unwrap();

        assert_eq!(ser0, lru2.serialize_shard(0).unwrap());
        assert_eq!(ser1, lru2.serialize_shard(1).unwrap());
    }

    #[tokio::test]
    async fn test_temp_file_cleanup() {
        let test_dir = "/tmp/test_lru_cleanup";
        let dir_path = Path::new(test_dir);

        // Create test directory
        std::fs::create_dir_all(dir_path).unwrap();

        // Create some fake temp files
        let temp_files = [
            "lru.data.0.12345678.tmp",
            "lru.data.1.abcdef00.tmp",
            "other_file.tmp", // Should not be removed
            "lru.data.2",     // Should not be removed
        ];

        for file in temp_files {
            let file_path = dir_path.join(file);
            std::fs::write(&file_path, b"test").unwrap();
        }

        // Run cleanup
        cleanup_temp_files(test_dir);

        tokio::time::sleep(core::time::Duration::from_secs(1)).await;

        // Check results
        assert!(!dir_path.join("lru.data.0.12345678.tmp").exists());
        assert!(!dir_path.join("lru.data.1.abcdef00.tmp").exists());
        assert!(dir_path.join("other_file.tmp").exists()); // Should remain
        assert!(dir_path.join("lru.data.2").exists()); // Should remain

        // Cleanup test directory
        std::fs::remove_dir_all(dir_path).unwrap();
    }
}


================================================
FILE: pingora-cache/src/eviction/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Cache eviction module

use crate::key::CompactCacheKey;

use async_trait::async_trait;
use pingora_error::Result;
use std::time::SystemTime;

pub mod lru;
pub mod simple_lru;

/// The trait that a cache eviction algorithm needs to implement
///
/// NOTE: these trait methods require &self not &mut self, which means concurrency should
/// be handled the implementations internally.
#[async_trait]
pub trait EvictionManager: Send + Sync {
    /// Total size of the cache in bytes tracked by this eviction manager
    fn total_size(&self) -> usize;
    /// Number of assets tracked by this eviction manager
    fn total_items(&self) -> usize;
    /// Number of bytes that are already evicted
    ///
    /// The accumulated number is returned to play well with Prometheus counter metric type.
    fn evicted_size(&self) -> usize;
    /// Number of assets that are already evicted
    ///
    /// The accumulated number is returned to play well with Prometheus counter metric type.
    fn evicted_items(&self) -> usize;

    /// Admit an item
    ///
    /// Return one or more items to evict. The sizes of these items are deducted
    /// from the total size already. The caller needs to make sure that these assets are actually
    /// removed from the storage.
    ///
    /// If the item is already admitted, A. update its freshness; B. if the new size is larger than the
    /// existing one, Some(_) might be returned for the caller to evict.
    fn admit(
        &self,
        item: CompactCacheKey,
        size: usize,
        fresh_until: SystemTime,
    ) -> Vec<CompactCacheKey>;

    /// Adjust an item's weight upwards by a delta. If the item is not already admitted,
    /// nothing will happen.
    ///
    /// An optional `max_weight` hint indicates the known max weight of the current key in case the
    /// weight should not be incremented above this amount.
    ///
    /// Return one or more items to evict. The sizes of these items are deducted
    /// from the total size already. The caller needs to make sure that these assets are actually
    /// removed from the storage.
    fn increment_weight(
        &self,
        item: &CompactCacheKey,
        delta: usize,
        max_weight: Option<usize>,
    ) -> Vec<CompactCacheKey>;

    /// Remove an item from the eviction manager.
    ///
    /// The size of the item will be deducted.
    fn remove(&self, item: &CompactCacheKey);

    /// Access an item that should already be in cache.
    ///
    /// If the item is not tracked by this [EvictionManager], track it but no eviction will happen.
    ///
    /// The call used for asking the eviction manager to track the assets that are already admitted
    /// in the cache storage system.
    fn access(&self, item: &CompactCacheKey, size: usize, fresh_until: SystemTime) -> bool;

    /// Peek into the manager to see if the item is already tracked by the system
    ///
    /// This function should have no side-effect on the asset itself. For example, for LRU, this
    /// method shouldn't change the popularity of the asset being peeked.
    fn peek(&self, item: &CompactCacheKey) -> bool;

    /// Serialize to save the state of this eviction manager to disk
    ///
    /// This function is for preserving the eviction manager's state across server restarts.
    ///
    /// `dir_path` define the directory on disk that the data should use.
    // dir_path is &str no AsRef<Path> so that trait objects can be used
    async fn save(&self, dir_path: &str) -> Result<()>;

    /// The counterpart of [Self::save()].
    async fn load(&self, dir_path: &str) -> Result<()>;
}


================================================
FILE: pingora-cache/src/eviction/simple_lru.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! A simple LRU cache manager built on top of the `lru` crate

use super::EvictionManager;
use crate::key::CompactCacheKey;

use async_trait::async_trait;
use lru::LruCache;
use parking_lot::RwLock;
use pingora_error::{BError, ErrorType::*, OrErr, Result};
use rand::Rng;
use serde::de::SeqAccess;
use serde::{Deserialize, Serialize};
use std::collections::hash_map::DefaultHasher;
use std::fs::File;
use std::hash::{Hash, Hasher};
use std::io::prelude::*;
use std::path::Path;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::time::SystemTime;

#[derive(Debug, Deserialize, Serialize)]
struct Node {
    key: CompactCacheKey,
    size: usize,
}

/// A simple LRU eviction manager
///
/// The implementation is not optimized. All operations require global locks.
pub struct Manager {
    lru: RwLock<LruCache<u64, Node>>,
    limit: usize,
    items_watermark: Option<usize>,
    used: AtomicUsize,
    items: AtomicUsize,
    evicted_size: AtomicUsize,
    evicted_items: AtomicUsize,
}

impl Manager {
    /// Create a new [Manager] with the given total size limit `limit`.
    pub fn new(limit: usize) -> Self {
        Manager {
            lru: RwLock::new(LruCache::unbounded()),
            limit,
            items_watermark: None,
            used: AtomicUsize::new(0),
            items: AtomicUsize::new(0),
            evicted_size: AtomicUsize::new(0),
            evicted_items: AtomicUsize::new(0),
        }
    }

    /// Create a new [Manager] with optional watermark in addition to size limit `limit`.
    pub fn new_with_watermark(limit: usize, items_watermark: Option<usize>) -> Self {
        Manager {
            lru: RwLock::new(LruCache::unbounded()),
            limit,
            items_watermark,
            used: AtomicUsize::new(0),
            items: AtomicUsize::new(0),
            evicted_size: AtomicUsize::new(0),
            evicted_items: AtomicUsize::new(0),
        }
    }

    fn insert(&self, hash_key: u64, node: CompactCacheKey, size: usize, reverse: bool) {
        use std::cmp::Ordering::*;
        let node = Node { key: node, size };
        let old = {
            let mut lru = self.lru.write();
            let old = lru.push(hash_key, node);
            if reverse && old.is_none() {
                lru.demote(&hash_key);
            }
            old
        };
        if let Some(old) = old {
            // replacing a node, just need to update used size
            match size.cmp(&old.1.size) {
                Greater => self.used.fetch_add(size - old.1.size, Ordering::Relaxed),
                Less => self.used.fetch_sub(old.1.size - size, Ordering::Relaxed),
                Equal => 0, // same size, update nothing, use 0 to match other arms' type
            };
        } else {
            self.used.fetch_add(size, Ordering::Relaxed);
            self.items.fetch_add(1, Ordering::Relaxed);
        }
    }

    fn increase_weight(&self, key: u64, delta: usize) {
        let mut lru = self.lru.write();
        let Some(node) = lru.get_key_value_mut(&key) else {
            return;
        };
        node.1.size += delta;
        self.used.fetch_add(delta, Ordering::Relaxed);
    }

    #[inline]
    fn over_limits(&self) -> bool {
        self.used.load(Ordering::Relaxed) > self.limit
            || self
                .items_watermark
                .is_some_and(|w| self.items.load(Ordering::Relaxed) > w)
    }

    // evict items until the used capacity is below the size limit and watermark count
    fn evict(&self) -> Vec<CompactCacheKey> {
        if self.used.load(Ordering::Relaxed) <= self.limit
            && self
                .items_watermark
                .is_none_or(|w| self.items.load(Ordering::Relaxed) <= w)
        {
            return vec![];
        }

        let mut to_evict = Vec::with_capacity(1); // we will at least pop 1 item

        while self.over_limits() {
            if let Some((_, node)) = self.lru.write().pop_lru() {
                self.used.fetch_sub(node.size, Ordering::Relaxed);
                self.items.fetch_sub(1, Ordering::Relaxed);
                self.evicted_size.fetch_add(node.size, Ordering::Relaxed);
                self.evicted_items.fetch_add(1, Ordering::Relaxed);
                to_evict.push(node.key);
            } else {
                // lru empty
                return to_evict;
            }
        }
        to_evict
    }

    // This could use a lot of memory to buffer the serialized data in memory and could lock the LRU
    // for too long
    fn serialize(&self) -> Result<Vec<u8>> {
        use rmp_serde::encode::Serializer;
        use serde::ser::SerializeSeq;
        use serde::ser::Serializer as _;
        // NOTE: This could use a lot of memory to buffer the serialized data in memory
        let mut ser = Serializer::new(vec![]);
        // NOTE: This long for loop could lock the LRU for too long
        let lru = self.lru.read();
        let mut seq = ser
            .serialize_seq(Some(lru.len()))
            .or_err(InternalError, "fail to serialize node")?;
        for item in lru.iter() {
            seq.serialize_element(item.1).unwrap(); // write to vec, safe
        }
        seq.end().or_err(InternalError, "when serializing LRU")?;
        Ok(ser.into_inner())
    }

    fn deserialize(&self, buf: &[u8]) -> Result<()> {
        use rmp_serde::decode::Deserializer;
        use serde::de::Deserializer as _;
        let mut de = Deserializer::new(buf);
        let visitor = InsertToManager { lru: self };
        de.deserialize_seq(visitor)
            .or_err(InternalError, "when deserializing LRU")?;
        Ok(())
    }
}

struct InsertToManager<'a> {
    lru: &'a Manager,
}

impl<'de> serde::de::Visitor<'de> for InsertToManager<'_> {
    type Value = ();

    fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
        formatter.write_str("array of lru nodes")
    }

    fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
    where
        A: SeqAccess<'de>,
    {
        while let Some(node) = seq.next_element::<Node>()? {
            let key = u64key(&node.key);
            self.lru.insert(key, node.key, node.size, true); // insert in the back
        }
        Ok(())
    }
}

#[inline]
fn u64key(key: &CompactCacheKey) -> u64 {
    let mut hasher = DefaultHasher::new();
    key.hash(&mut hasher);
    hasher.finish()
}

const FILE_NAME: &str = "simple_lru.data";

#[async_trait]
impl EvictionManager for Manager {
    fn total_size(&self) -> usize {
        self.used.load(Ordering::Relaxed)
    }
    fn total_items(&self) -> usize {
        self.items.load(Ordering::Relaxed)
    }
    fn evicted_size(&self) -> usize {
        self.evicted_size.load(Ordering::Relaxed)
    }
    fn evicted_items(&self) -> usize {
        self.evicted_items.load(Ordering::Relaxed)
    }

    fn admit(
        &self,
        item: CompactCacheKey,
        size: usize,
        _fresh_until: SystemTime,
    ) -> Vec<CompactCacheKey> {
        let key = u64key(&item);
        self.insert(key, item, size, false);
        self.evict()
    }

    fn increment_weight(
        &self,
        item: &CompactCacheKey,
        delta: usize,
        _max_weight: Option<usize>,
    ) -> Vec<CompactCacheKey> {
        let key = u64key(item);
        self.increase_weight(key, delta);
        self.evict()
    }

    fn remove(&self, item: &CompactCacheKey) {
        let key = u64key(item);
        let node = self.lru.write().pop(&key);
        if let Some(n) = node {
            self.used.fetch_sub(n.size, Ordering::Relaxed);
            self.items.fetch_sub(1, Ordering::Relaxed);
        }
    }

    fn access(&self, item: &CompactCacheKey, size: usize, _fresh_until: SystemTime) -> bool {
        let key = u64key(item);
        if self.lru.write().get(&key).is_none() {
            self.insert(key, item.clone(), size, false);
            false
        } else {
            true
        }
    }

    fn peek(&self, item: &CompactCacheKey) -> bool {
        let key = u64key(item);
        self.lru.read().peek(&key).is_some()
    }

    async fn save(&self, dir_path: &str) -> Result<()> {
        let data = self.serialize()?;
        let dir_str = dir_path.to_owned();
        tokio::task::spawn_blocking(move || {
            let dir_path = Path::new(&dir_str);
            std::fs::create_dir_all(dir_path)
                .or_err_with(InternalError, || format!("fail to create {dir_str}"))?;

            let final_file_path = dir_path.join(FILE_NAME);
            // create a temporary filename using a randomized u32 hash to minimize the chance of multiple writers writing to the same tmp file
            let random_suffix: u32 = rand::thread_rng().gen();
            let temp_file_path = dir_path.join(format!("{}.{:08x}.tmp", FILE_NAME, random_suffix));
            let mut file = File::create(&temp_file_path).or_err_with(InternalError, || {
                format!("fail to create temporary file {}", temp_file_path.display())
            })?;
            file.write_all(&data).or_err_with(InternalError, || {
                format!("fail to write to {}", temp_file_path.display())
            })?;
            file.flush().or_err_with(InternalError, || {
                format!("fail to flush temp file {}", temp_file_path.display())
            })?;
            std::fs::rename(&temp_file_path, &final_file_path).or_err_with(InternalError, || {
                format!(
                    "fail to rename temporary file {} to {}",
                    temp_file_path.display(),
                    final_file_path.display()
                )
            })
        })
        .await
        .or_err(InternalError, "async blocking IO failure")?
    }

    async fn load(&self, dir_path: &str) -> Result<()> {
        let dir_path = dir_path.to_owned();
        let data = tokio::task::spawn_blocking(move || {
            let file_path = Path::new(&dir_path).join(FILE_NAME);
            let mut file = File::open(file_path.clone()).or_err_with(InternalError, || {
                format!("fail to open {}", file_path.display())
            })?;
            let mut buffer = Vec::with_capacity(8192);
            file.read_to_end(&mut buffer)
                .or_err(InternalError, "fail to read from {file_path}")?;
            Ok::<Vec<u8>, BError>(buffer)
        })
        .await
        .or_err(InternalError, "async blocking IO failure")??;
        self.deserialize(&data)
    }
}

#[cfg(test)]
mod test {
    use super::*;
    use crate::CacheKey;

    #[test]
    fn test_admission() {
        let lru = Manager::new(4);
        let key1 = CacheKey::new("", "a", "1").to_compact();
        let until = SystemTime::now(); // unused value as a placeholder
        let v = lru.admit(key1.clone(), 1, until);
        assert_eq!(v.len(), 0);
        let key2 = CacheKey::new("", "b", "1").to_compact();
        let v = lru.admit(key2.clone(), 2, until);
        assert_eq!(v.len(), 0);
        let key3 = CacheKey::new("", "c", "1").to_compact();
        let v = lru.admit(key3, 1, until);
        assert_eq!(v.len(), 0);

        // lru si full (4) now

        let key4 = CacheKey::new("", "d", "1").to_compact();
        let v = lru.admit(key4, 2, until);
        // need to reduce used by at least 2, both key1 and key2 are evicted to make room for 3
        assert_eq!(v.len(), 2);
        assert_eq!(v[0], key1);
        assert_eq!(v[1], key2);
    }

    #[test]
    fn test_access() {
        let lru = Manager::new(4);
        let key1 = CacheKey::new("", "a", "1").to_compact();
        let until = SystemTime::now(); // unused value as a placeholder
        let v = lru.admit(key1.clone(), 1, until);
        assert_eq!(v.len(), 0);
        let key2 = CacheKey::new("", "b", "1").to_compact();
        let v = lru.admit(key2.clone(), 2, until);
        assert_eq!(v.len(), 0);
        let key3 = CacheKey::new("", "c", "1").to_compact();
        let v = lru.admit(key3, 1, until);
        assert_eq!(v.len(), 0);

        // lru is full (4) now
        // make key1 most recently used
        lru.access(&key1, 1, until);
        assert_eq!(v.len(), 0);

        let key4 = CacheKey::new("", "d", "1").to_compact();
        let v = lru.admit(key4, 2, until);
        assert_eq!(v.len(), 1);
        assert_eq!(v[0], key2);
    }

    #[test]
    fn test_remove() {
        let lru = Manager::new(4);
        let key1 = CacheKey::new("", "a", "1").to_compact();
        let until = SystemTime::now(); // unused value as a placeholder
        let v = lru.admit(key1.clone(), 1, until);
        assert_eq!(v.len(), 0);
        let key2 = CacheKey::new("", "b", "1").to_compact();
        let v = lru.admit(key2.clone(), 2, until);
        assert_eq!(v.len(), 0);
        let key3 = CacheKey::new("", "c", "1").to_compact();
        let v = lru.admit(key3, 1, until);
        assert_eq!(v.len(), 0);

        // lru is full (4) now
        // remove key1
        lru.remove(&key1);

        // key2 is the least recently used one now
        let key4 = CacheKey::new("", "d", "1").to_compact();
        let v = lru.admit(key4, 2, until);
        assert_eq!(v.len(), 1);
        assert_eq!(v[0], key2);
    }

    #[test]
    fn test_access_add() {
        let lru = Manager::new(4);
        let until = SystemTime::now(); // unused value as a placeholder

        let key1 = CacheKey::new("", "a", "1").to_compact();
        lru.access(&key1, 1, until);
        let key2 = CacheKey::new("", "b", "1").to_compact();
        lru.access(&key2, 2, until);
        let key3 = CacheKey::new("", "c", "1").to_compact();
        lru.access(&key3, 2, until);

        let key4 = CacheKey::new("", "d", "1").to_compact();
        let v = lru.admit(key4, 2, until);
        // need to reduce used by at least 2, both key1 and key2 are evicted to make room for 3
        assert_eq!(v.len(), 2);
        assert_eq!(v[0], key1);
        assert_eq!(v[1], key2);
    }

    #[test]
    fn test_admit_update() {
        let lru = Manager::new(4);
        let key1 = CacheKey::new("", "a", "1").to_compact();
        let until = SystemTime::now(); // unused value as a placeholder
        let v = lru.admit(key1.clone(), 1, until);
        assert_eq!(v.len(), 0);
        let key2 = CacheKey::new("", "b", "1").to_compact();
        let v = lru.admit(key2.clone(), 2, until);
        assert_eq!(v.len(), 0);
        let key3 = CacheKey::new("", "c", "1").to_compact();
        let v = lru.admit(key3, 1, until);
        assert_eq!(v.len(), 0);

        // lru is full (4) now
        // update key2 to reduce its size by 1
        let v = lru.admit(key2, 1, until);
        assert_eq!(v.len(), 0);

        // lru is not full anymore
        let key4 = CacheKey::new("", "d", "1").to_compact();
        let v = lru.admit(key4.clone(), 1, until);
        assert_eq!(v.len(), 0);

        // make key4 larger
        let v = lru.admit(key4, 2, until);
        // need to evict now
        assert_eq!(v.len(), 1);
        assert_eq!(v[0], key1);
    }

    #[test]
    fn test_serde() {
        let lru = Manager::new(4);
        let key1 = CacheKey::new("", "a", "1").to_compact();
        let until = SystemTime::now(); // unused value as a placeholder
        let v = lru.admit(key1.clone(), 1, until);
        assert_eq!(v.len(), 0);
        let key2 = CacheKey::new("", "b", "1").to_compact();
        let v = lru.admit(key2.clone(), 2, until);
        assert_eq!(v.len(), 0);
        let key3 = CacheKey::new("", "c", "1").to_compact();
        let v = lru.admit(key3, 1, until);
        assert_eq!(v.len(), 0);

        // lru is full (4) now
        // make key1 most recently used
        lru.access(&key1, 1, until);
        assert_eq!(v.len(), 0);

        // load lru2 with lru's data
        let ser = lru.serialize().unwrap();
        let lru2 = Manager::new(4);
        lru2.deserialize(&ser).unwrap();

        let key4 = CacheKey::new("", "d", "1").to_compact();
        let v = lru2.admit(key4, 2, until);
        assert_eq!(v.len(), 1);
        assert_eq!(v[0], key2);
    }

    #[tokio::test]
    async fn test_save_to_disk() {
        let lru = Manager::new(4);
        let key1 = CacheKey::new("", "a", "1").to_compact();
        let until = SystemTime::now(); // unused value as a placeholder
        let v = lru.admit(key1.clone(), 1, until);
        assert_eq!(v.len(), 0);
        let key2 = CacheKey::new("", "b", "1").to_compact();
        let v = lru.admit(key2.clone(), 2, until);
        assert_eq!(v.len(), 0);
        let key3 = CacheKey::new("", "c", "1").to_compact();
        let v = lru.admit(key3, 1, until);
        assert_eq!(v.len(), 0);

        // lru is full (4) now
        // make key1 most recently used
        lru.access(&key1, 1, until);
        assert_eq!(v.len(), 0);

        // load lru2 with lru's data
        lru.save("/tmp/test_simple_lru_save").await.unwrap();
        let lru2 = Manager::new(4);
        lru2.load("/tmp/test_simple_lru_save").await.unwrap();

        let key4 = CacheKey::new("", "d", "1").to_compact();
        let v = lru2.admit(key4, 2, until);
        assert_eq!(v.len(), 1);
        assert_eq!(v[0], key2);
    }

    #[test]
    fn test_watermark_eviction() {
        const SIZE_LIMIT: usize = usize::MAX / 2;
        let lru = Manager::new_with_watermark(SIZE_LIMIT, Some(4));
        let until = SystemTime::now();

        // admit 6 items of size 1
        for name in ["a", "b", "c", "d", "e", "f"] {
            let key = CacheKey::new("", name, "1").to_compact();
            let _ = lru.admit(key, 1, until);
        }

        // test items were evicted due to watermark
        assert_eq!(lru.total_items(), 4);
        assert_eq!(lru.evicted_items(), 2);
        assert_eq!(lru.evicted_size(), 2);
        assert!(lru.total_size() <= SIZE_LIMIT);
    }
}


================================================
FILE: pingora-cache/src/filters.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Utility functions to help process HTTP headers for caching

use super::*;
use crate::cache_control::{CacheControl, Cacheable, InterpretCacheControl};
use crate::RespCacheable::*;

use cache_control::DELTA_SECONDS_OVERFLOW_VALUE;
use http::{header, HeaderValue};
use httpdate::HttpDate;
use log::debug;
use pingora_http::RequestHeader;

/// Decide if the request can be cacheable
pub fn request_cacheable(req_header: &ReqHeader) -> bool {
    // TODO: the check is incomplete
    matches!(req_header.method, Method::GET | Method::HEAD)
}

/// Decide if the response is cacheable.
///
/// `cache_control` is the parsed [CacheControl] from the response header. It is a standalone
/// argument so that caller has the flexibility to choose to use, change or ignore it.
pub fn resp_cacheable(
    cache_control: Option<&CacheControl>,
    mut resp_header: ResponseHeader,
    authorization_present: bool,
    defaults: &CacheMetaDefaults,
) -> RespCacheable {
    let now = SystemTime::now();
    let expire_time = calculate_fresh_until(
        now,
        cache_control,
        &resp_header,
        authorization_present,
        defaults,
    );
    if let Some(fresh_until) = expire_time {
        let (stale_while_revalidate_duration, stale_if_error_duration) =
            calculate_serve_stale_durations(cache_control, defaults);

        if let Some(cc) = cache_control {
            cc.strip_private_headers(&mut resp_header);
        }
        return Cacheable(CacheMeta::new(
            fresh_until,
            now,
            stale_while_revalidate_duration,
            stale_if_error_duration,
            resp_header,
        ));
    }
    Uncacheable(NoCacheReason::OriginNotCache)
}

/// Calculate the [SystemTime] at which the asset expires
///
/// Return None when not cacheable.
pub fn calculate_fresh_until(
    now: SystemTime,
    cache_control: Option<&CacheControl>,
    resp_header: &RespHeader,
    authorization_present: bool,
    defaults: &CacheMetaDefaults,
) -> Option<SystemTime> {
    fn freshness_ttl_to_time(now: SystemTime, fresh: Duration) -> Option<SystemTime> {
        if fresh.is_zero() {
            // ensure that the response is treated as stale
            now.checked_sub(Duration::from_secs(1))
        } else {
            now.checked_add(fresh)
        }
    }

    // A request with Authorization is normally not cacheable, unless Cache-Control allows it
    if authorization_present {
        let uncacheable = cache_control
            .as_ref()
            .is_none_or(|cc| !cc.allow_caching_authorized_req());
        if uncacheable {
            return None;
        }
    }

    let uncacheable = cache_control
        .as_ref()
        .is_some_and(|cc| cc.is_cacheable() == Cacheable::No);
    if uncacheable {
        return None;
    }

    // For TTL check cache-control first, then expires header, then defaults
    cache_control
        .and_then(|cc| {
            cc.fresh_duration()
                .and_then(|ttl| freshness_ttl_to_time(now, ttl))
        })
        .or_else(|| calculate_expires_header_time(resp_header))
        .or_else(|| {
            defaults
                .fresh_sec(resp_header.status)
                .and_then(|ttl| freshness_ttl_to_time(now, ttl))
        })
}

/// Calculate the expire time from the `Expires` header only
pub fn calculate_expires_header_time(resp_header: &RespHeader) -> Option<SystemTime> {
    // according to RFC 7234:
    // https://datatracker.ietf.org/doc/html/rfc7234#section-4.2.1
    // - treat multiple expires headers as invalid
    // https://datatracker.ietf.org/doc/html/rfc7234#section-5.3
    // - "MUST interpret invalid date formats... as representing a time in the past"
    fn parse_expires_value(expires_value: &HeaderValue) -> Option<SystemTime> {
        let expires = expires_value.to_str().ok()?;
        Some(SystemTime::from(
            expires
                .parse::<HttpDate>()
                .map_err(|e| debug!("Invalid HttpDate in Expires: {}, error: {}", expires, e))
                .ok()?,
        ))
    }

    let mut expires_iter = resp_header.headers.get_all("expires").iter();
    let expires_header = expires_iter.next();
    if expires_header.is_none() || expires_iter.next().is_some() {
        return None;
    }
    parse_expires_value(expires_header.unwrap()).or(Some(SystemTime::UNIX_EPOCH))
}

/// Calculates stale-while-revalidate and stale-if-error seconds from Cache-Control or the [CacheMetaDefaults].
pub fn calculate_serve_stale_durations(
    cache_control: Option<&impl InterpretCacheControl>,
    defaults: &CacheMetaDefaults,
) -> (u32, u32) {
    let serve_stale_while_revalidate = cache_control
        .and_then(|cc| cc.serve_stale_while_revalidate_duration())
        .unwrap_or_else(|| Duration::from_secs(defaults.serve_stale_while_revalidate_sec() as u64));
    let serve_stale_if_error = cache_control
        .and_then(|cc| cc.serve_stale_if_error_duration())
        .unwrap_or_else(|| Duration::from_secs(defaults.serve_stale_if_error_sec() as u64));
    (
        serve_stale_while_revalidate
            .as_secs()
            .try_into()
            .unwrap_or(DELTA_SECONDS_OVERFLOW_VALUE),
        serve_stale_if_error
            .as_secs()
            .try_into()
            .unwrap_or(DELTA_SECONDS_OVERFLOW_VALUE),
    )
}

/// Filters to run when sending requests to upstream
pub mod upstream {
    use super::*;

    /// Adjust the request header for cacheable requests
    ///
    /// This filter does the following in order to fetch the entire response to cache
    /// - Convert HEAD to GET
    /// - `If-*` headers are removed
    /// - `Range` header is removed
    ///
    /// When `meta` is set, this function will inject `If-modified-since` according to the `Last-Modified` header
    /// and inject `If-none-match` according to `Etag` header
    pub fn request_filter(req: &mut RequestHeader, meta: Option<&CacheMeta>) {
        // change HEAD to GET, HEAD itself is not semantically cacheable
        if req.method == Method::HEAD {
            req.set_method(Method::GET);
        }

        // remove downstream precondition headers https://datatracker.ietf.org/doc/html/rfc7232#section-3
        // we'd like to cache the 200 not the 304
        req.remove_header(&header::IF_MATCH);
        req.remove_header(&header::IF_NONE_MATCH);
        req.remove_header(&header::IF_MODIFIED_SINCE);
        req.remove_header(&header::IF_UNMODIFIED_SINCE);
        // see below range header
        req.remove_header(&header::IF_RANGE);

        // remove downstream range header as we'd like to cache the entire response (this might change in the future)
        req.remove_header(&header::RANGE);

        // we have a presumably staled response already, add precondition headers for revalidation
        if let Some(m) = meta {
            // rfc7232: "SHOULD send both validators in cache validation" but
            // there have been weird cases that an origin has matching etag but not Last-Modified
            if let Some(since) = m.headers().get(&header::LAST_MODIFIED) {
                req.insert_header(header::IF_MODIFIED_SINCE, since).unwrap();
            }
            if let Some(etag) = m.headers().get(&header::ETAG) {
                req.insert_header(header::IF_NONE_MATCH, etag).unwrap();
            }
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::RespCacheable::Cacheable;
    use http::header::{HeaderName, CACHE_CONTROL, EXPIRES, SET_COOKIE};
    use http::StatusCode;
    use httpdate::fmt_http_date;

    fn init_log() {
        let _ = env_logger::builder().is_test(true).try_init();
    }

    const DEFAULTS: CacheMetaDefaults = CacheMetaDefaults::new(
        |status| {
            match status {
                StatusCode::OK => Some(10),
                StatusCode::NOT_FOUND => Some(5),
                StatusCode::PARTIAL_CONTENT => None,
                _ => Some(1),
            }
            .map(Duration::from_secs)
        },
        0,
        DELTA_SECONDS_OVERFLOW_VALUE, /* "infinite" stale-if-error */
    );

    // Cache nothing, by default
    const BYPASS_CACHE_DEFAULTS: CacheMetaDefaults = CacheMetaDefaults::new(|_| None, 0, 0);

    fn build_response(status: u16, headers: &[(HeaderName, &str)]) -> ResponseHeader {
        let mut header = ResponseHeader::build(status, Some(headers.len())).unwrap();
        for (k, v) in headers {
            header.append_header(k.to_string(), *v).unwrap();
        }
        header
    }

    fn resp_cacheable_wrapper(
        resp: ResponseHeader,
        defaults: &CacheMetaDefaults,
        authorization_present: bool,
    ) -> Option<CacheMeta> {
        if let Cacheable(meta) = resp_cacheable(
            CacheControl::from_resp_headers(&resp).as_ref(),
            resp,
            authorization_present,
            defaults,
        ) {
            Some(meta)
        } else {
            None
        }
    }

    #[test]
    fn test_resp_cacheable() {
        let meta = resp_cacheable_wrapper(
            build_response(200, &[(CACHE_CONTROL, "max-age=12345")]),
            &DEFAULTS,
            false,
        );

        let meta = meta.unwrap();
        assert!(meta.is_fresh(SystemTime::now()));
        assert!(meta.is_fresh(
            SystemTime::now()
                .checked_add(Duration::from_secs(12))
                .unwrap()
        ),);
        assert!(!meta.is_fresh(
            SystemTime::now()
                .checked_add(Duration::from_secs(12346))
                .unwrap()
        ));
    }

    #[test]
    fn test_resp_uncacheable_directives() {
        let meta = resp_cacheable_wrapper(
            build_response(200, &[(CACHE_CONTROL, "private, max-age=12345")]),
            &DEFAULTS,
            false,
        );
        assert!(meta.is_none());

        let meta = resp_cacheable_wrapper(
            build_response(200, &[(CACHE_CONTROL, "no-store, max-age=12345")]),
            &DEFAULTS,
            false,
        );
        assert!(meta.is_none());
    }

    #[test]
    fn test_resp_cache_authorization() {
        let meta = resp_cacheable_wrapper(build_response(200, &[]), &DEFAULTS, true);
        assert!(meta.is_none());

        let meta = resp_cacheable_wrapper(
            build_response(200, &[(CACHE_CONTROL, "max-age=10")]),
            &DEFAULTS,
            true,
        );
        assert!(meta.is_none());

        let meta = resp_cacheable_wrapper(
            build_response(200, &[(CACHE_CONTROL, "s-maxage=10")]),
            &DEFAULTS,
            true,
        );
        assert!(meta.unwrap().is_fresh(SystemTime::now()));

        let meta = resp_cacheable_wrapper(
            build_response(200, &[(CACHE_CONTROL, "public, max-age=10")]),
            &DEFAULTS,
            true,
        );
        assert!(meta.unwrap().is_fresh(SystemTime::now()));

        let meta = resp_cacheable_wrapper(
            build_response(200, &[(CACHE_CONTROL, "must-revalidate")]),
            &DEFAULTS,
            true,
        );
        assert!(meta.unwrap().is_fresh(SystemTime::now()));
    }

    #[test]
    fn test_resp_zero_max_age() {
        let meta = resp_cacheable_wrapper(
            build_response(200, &[(CACHE_CONTROL, "max-age=0, public")]),
            &DEFAULTS,
            false,
        );

        // cacheable, but needs revalidation
        assert!(!meta.unwrap().is_fresh(SystemTime::now()));
    }

    #[test]
    fn test_resp_expires() {
        let five_sec_time = SystemTime::now()
            .checked_add(Duration::from_secs(5))
            .unwrap();

        // future expires is cacheable
        let meta = resp_cacheable_wrapper(
            build_response(200, &[(EXPIRES, &fmt_http_date(five_sec_time))]),
            &DEFAULTS,
            false,
        );

        let meta = meta.unwrap();
        assert!(meta.is_fresh(SystemTime::now()));
        assert!(!meta.is_fresh(
            SystemTime::now()
                .checked_add(Duration::from_secs(6))
                .unwrap()
        ));

        // even on default uncacheable statuses
        let meta = resp_cacheable_wrapper(
            build_response(206, &[(EXPIRES, &fmt_http_date(five_sec_time))]),
            &DEFAULTS,
            false,
        );
        assert!(meta.is_some());
    }

    #[test]
    fn test_resp_past_expires() {
        // cacheable, but expired
        let meta = resp_cacheable_wrapper(
            build_response(200, &[(EXPIRES, "Fri, 15 May 2015 15:34:21 GMT")]),
            &BYPASS_CACHE_DEFAULTS,
            false,
        );
        assert!(!meta.unwrap().is_fresh(SystemTime::now()));
    }

    #[test]
    fn test_resp_nonstandard_expires() {
        // init log to allow inspecting warnings
        init_log();

        // invalid cases, according to parser
        // (but should be stale according to RFC)
        let meta = resp_cacheable_wrapper(
            build_response(200, &[(EXPIRES, "Mon, 13 Feb 0002 12:00:00 GMT")]),
            &BYPASS_CACHE_DEFAULTS,
            false,
        );
        assert!(!meta.unwrap().is_fresh(SystemTime::now()));

        let meta = resp_cacheable_wrapper(
            build_response(200, &[(EXPIRES, "Fri, 01 Dec 99999 16:00:00 GMT")]),
            &BYPASS_CACHE_DEFAULTS,
            false,
        );
        assert!(!meta.unwrap().is_fresh(SystemTime::now()));

        let meta = resp_cacheable_wrapper(
            build_response(200, &[(EXPIRES, "0")]),
            &BYPASS_CACHE_DEFAULTS,
            false,
        );
        assert!(!meta.unwrap().is_fresh(SystemTime::now()));
    }

    #[test]
    fn test_resp_multiple_expires() {
        let five_sec_time = SystemTime::now()
            .checked_add(Duration::from_secs(5))
            .unwrap();
        let ten_sec_time = SystemTime::now()
            .checked_add(Duration::from_secs(10))
            .unwrap();

        // multiple expires = uncacheable
        let meta = resp_cacheable_wrapper(
            build_response(
                200,
                &[
                    (EXPIRES, &fmt_http_date(five_sec_time)),
                    (EXPIRES, &fmt_http_date(ten_sec_time)),
                ],
            ),
            &BYPASS_CACHE_DEFAULTS,
            false,
        );
        assert!(meta.is_none());

        // unless the default is cacheable
        let meta = resp_cacheable_wrapper(
            build_response(
                200,
                &[
                    (EXPIRES, &fmt_http_date(five_sec_time)),
                    (EXPIRES, &fmt_http_date(ten_sec_time)),
                ],
            ),
            &DEFAULTS,
            false,
        );
        assert!(meta.is_some());
    }

    #[test]
    fn test_resp_cache_control_with_expires() {
        let five_sec_time = SystemTime::now()
            .checked_add(Duration::from_secs(5))
            .unwrap();
        // cache-control takes precedence over expires
        let meta = resp_cacheable_wrapper(
            build_response(
                200,
                &[
                    (EXPIRES, &fmt_http_date(five_sec_time)),
                    (CACHE_CONTROL, "max-age=0"),
                ],
            ),
            &DEFAULTS,
            false,
        );
        assert!(!meta.unwrap().is_fresh(SystemTime::now()));
    }

    #[test]
    fn test_resp_stale_while_revalidate() {
        // respect defaults
        let meta = resp_cacheable_wrapper(
            build_response(200, &[(CACHE_CONTROL, "max-age=10")]),
            &DEFAULTS,
            false,
        );

        let meta = meta.unwrap();
        let eleven_sec_time = SystemTime::now()
            .checked_add(Duration::from_secs(11))
            .unwrap();
        assert!(!meta.is_fresh(eleven_sec_time));
        assert!(!meta.serve_stale_while_revalidate(SystemTime::now()));
        assert!(!meta.serve_stale_while_revalidate(eleven_sec_time));

        // override with stale-while-revalidate
        let meta = resp_cacheable_wrapper(
            build_response(
                200,
                &[(CACHE_CONTROL, "max-age=10, stale-while-revalidate=5")],
            ),
            &DEFAULTS,
            false,
        );

        let meta = meta.unwrap();
        let eleven_sec_time = SystemTime::now()
            .checked_add(Duration::from_secs(11))
            .unwrap();
        let sixteen_sec_time = SystemTime::now()
            .checked_add(Duration::from_secs(16))
            .unwrap();
        assert!(!meta.is_fresh(eleven_sec_time));
        assert!(meta.serve_stale_while_revalidate(eleven_sec_time));
        assert!(!meta.serve_stale_while_revalidate(sixteen_sec_time));
    }

    #[test]
    fn test_resp_stale_if_error() {
        // respect defaults
        let meta = resp_cacheable_wrapper(
            build_response(200, &[(CACHE_CONTROL, "max-age=10")]),
            &DEFAULTS,
            false,
        );

        let meta = meta.unwrap();
        let fifty_years_time = SystemTime::now()
            .checked_add(Duration::from_secs(86400 * 365 * 50))
            .unwrap();
        assert!(!meta.is_fresh(fifty_years_time));
        assert!(meta.serve_stale_if_error(fifty_years_time));

        // override with stale-if-error
        let meta = resp_cacheable_wrapper(
            build_response(
                200,
                &[(
                    CACHE_CONTROL,
                    "max-age=10, stale-while-revalidate=5, stale-if-error=60",
                )],
            ),
            &DEFAULTS,
            false,
        );

        let meta = meta.unwrap();
        let eleven_sec_time = SystemTime::now()
            .checked_add(Duration::from_secs(11))
            .unwrap();
        let seventy_sec_time = SystemTime::now()
            .checked_add(Duration::from_secs(70))
            .unwrap();
        assert!(!meta.is_fresh(eleven_sec_time));
        assert!(meta.serve_stale_if_error(SystemTime::now()));
        assert!(meta.serve_stale_if_error(eleven_sec_time));
        assert!(!meta.serve_stale_if_error(seventy_sec_time));

        // never serve stale
        let meta = resp_cacheable_wrapper(
            build_response(200, &[(CACHE_CONTROL, "max-age=10, stale-if-error=0")]),
            &DEFAULTS,
            false,
        );

        let meta = meta.unwrap();
        let eleven_sec_time = SystemTime::now()
            .checked_add(Duration::from_secs(11))
            .unwrap();
        assert!(!meta.is_fresh(eleven_sec_time));
        assert!(!meta.serve_stale_if_error(eleven_sec_time));
    }

    #[test]
    fn test_resp_status_cache_defaults() {
        // 200 response
        let meta = resp_cacheable_wrapper(build_response(200, &[]), &DEFAULTS, false);
        assert!(meta.is_some());

        let meta = meta.unwrap();
        assert!(meta.is_fresh(
            SystemTime::now()
                .checked_add(Duration::from_secs(9))
                .unwrap()
        ));
        assert!(!meta.is_fresh(
            SystemTime::now()
                .checked_add(Duration::from_secs(11))
                .unwrap()
        ));

        // 404 response, different ttl
        let meta = resp_cacheable_wrapper(build_response(404, &[]), &DEFAULTS, false);
        assert!(meta.is_some());

        let meta = meta.unwrap();
        assert!(meta.is_fresh(
            SystemTime::now()
                .checked_add(Duration::from_secs(4))
                .unwrap()
        ));
        assert!(!meta.is_fresh(
            SystemTime::now()
                .checked_add(Duration::from_secs(6))
                .unwrap()
        ));

        // 206 marked uncacheable (no cache TTL)
        let meta = resp_cacheable_wrapper(build_response(206, &[]), &DEFAULTS, false);
        assert!(meta.is_none());

        // default uncacheable status with explicit Cache-Control is cacheable
        let meta = resp_cacheable_wrapper(
            build_response(206, &[(CACHE_CONTROL, "public, max-age=10")]),
            &DEFAULTS,
            false,
        );
        assert!(meta.is_some());

        let meta = meta.unwrap();
        assert!(meta.is_fresh(
            SystemTime::now()
                .checked_add(Duration::from_secs(9))
                .unwrap()
        ));
        assert!(!meta.is_fresh(
            SystemTime::now()
                .checked_add(Duration::from_secs(11))
                .unwrap()
        ));

        // 416 matches any status
        let meta = resp_cacheable_wrapper(build_response(416, &[]), &DEFAULTS, false);
        assert!(meta.is_some());

        let meta = meta.unwrap();
        assert!(meta.is_fresh(SystemTime::now()));
        assert!(!meta.is_fresh(
            SystemTime::now()
                .checked_add(Duration::from_secs(2))
                .unwrap()
        ));
    }

    #[test]
    fn test_resp_cache_no_cache_fields() {
        // check #field-names are stripped from the cache header
        let meta = resp_cacheable_wrapper(
            build_response(
                200,
                &[
                    (SET_COOKIE, "my-cookie"),
                    (CACHE_CONTROL, "private=\"something\", max-age=10"),
                    (HeaderName::from_bytes(b"Something").unwrap(), "foo"),
                ],
            ),
            &DEFAULTS,
            false,
        );
        let meta = meta.unwrap();
        assert!(meta.headers().contains_key(SET_COOKIE));
        assert!(!meta.headers().contains_key("Something"));

        let meta = resp_cacheable_wrapper(
            build_response(
                200,
                &[
                    (SET_COOKIE, "my-cookie"),
                    (
                        CACHE_CONTROL,
                        "max-age=0, no-cache=\"meta1, SeT-Cookie ,meta2\"",
                    ),
                    (HeaderName::from_bytes(b"meta1").unwrap(), "foo"),
                ],
            ),
            &DEFAULTS,
            false,
        );
        let meta = meta.unwrap();
        assert!(!meta.headers().contains_key(SET_COOKIE));
        assert!(!meta.headers().contains_key("meta1"));
    }
}


================================================
FILE: pingora-cache/src/hashtable.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Concurrent hash tables and LRUs

use lru::LruCache;
use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard};
use std::collections::HashMap;

// There are probably off-the-shelf crates of this, DashMap?
/// A hash table that shards to a constant number of tables to reduce lock contention
#[derive(Debug)]
pub struct ConcurrentHashTable<V, const N: usize> {
    tables: [RwLock<HashMap<u128, V>>; N],
}

#[inline]
fn get_shard(key: u128, n_shards: usize) -> usize {
    (key % n_shards as u128) as usize
}

impl<V, const N: usize> ConcurrentHashTable<V, N>
where
    [RwLock<HashMap<u128, V>>; N]: Default,
{
    pub fn new() -> Self {
        ConcurrentHashTable {
            tables: Default::default(),
        }
    }
    pub fn get(&self, key: u128) -> &RwLock<HashMap<u128, V>> {
        &self.tables[get_shard(key, N)]
    }

    #[allow(dead_code)]
    pub fn get_shard_at_idx(&self, idx: usize) -> Option<&RwLock<HashMap<u128, V>>> {
        self.tables.get(idx)
    }

    #[allow(dead_code)]
    pub fn read(&self, key: u128) -> RwLockReadGuard<'_, HashMap<u128, V>> {
        self.get(key).read()
    }

    pub fn write(&self, key: u128) -> RwLockWriteGuard<'_, HashMap<u128, V>> {
        self.get(key).write()
    }

    #[allow(dead_code)]
    pub fn for_each<F>(&self, mut f: F)
    where
        F: FnMut(&u128, &V),
    {
        for shard in &self.tables {
            let guard = shard.read();
            for (key, value) in guard.iter() {
                f(key, value);
            }
        }
    }

    // TODO: work out the lifetimes to provide get/set directly
}

impl<V, const N: usize> Default for ConcurrentHashTable<V, N>
where
    [RwLock<HashMap<u128, V>>; N]: Default,
{
    fn default() -> Self {
        Self::new()
    }
}

#[doc(hidden)] // not need in public API
pub struct LruShard<V>(RwLock<LruCache<u128, V>>);
impl<V> Default for LruShard<V> {
    fn default() -> Self {
        // help satisfy default construction of arrays
        LruShard(RwLock::new(LruCache::unbounded()))
    }
}

/// Sharded concurrent data structure for LruCache
pub struct ConcurrentLruCache<V, const N: usize> {
    lrus: [LruShard<V>; N],
}

impl<V, const N: usize> ConcurrentLruCache<V, N>
where
    [LruShard<V>; N]: Default,
{
    pub fn new(shard_capacity: usize) -> Self {
        use std::num::NonZeroUsize;
        // safe, 1 != 0
        const ONE: NonZeroUsize = NonZeroUsize::new(1).unwrap();
        let mut cache = ConcurrentLruCache {
            lrus: Default::default(),
        };
        for lru in &mut cache.lrus {
            lru.0
                .write()
                .resize(shard_capacity.try_into().unwrap_or(ONE));
        }
        cache
    }
    pub fn get(&self, key: u128) -> &RwLock<LruCache<u128, V>> {
        &self.lrus[get_shard(key, N)].0
    }

    #[allow(dead_code)]
    pub fn read(&self, key: u128) -> RwLockReadGuard<'_, LruCache<u128, V>> {
        self.get(key).read()
    }

    pub fn write(&self, key: u128) -> RwLockWriteGuard<'_, LruCache<u128, V>> {
        self.get(key).write()
    }

    // TODO: work out the lifetimes to provide get/set directly
}


================================================
FILE: pingora-cache/src/key.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Cache key

use blake2::{Blake2b, Digest};
use http::Extensions;
use serde::{Deserialize, Serialize};
use std::fmt::{Display, Formatter, Result as FmtResult};

// 16-byte / 128-bit key: large enough to avoid collision
const KEY_SIZE: usize = 16;

/// An 128 bit hash binary
pub type HashBinary = [u8; KEY_SIZE];

fn hex2str(hex: &[u8]) -> String {
    use std::fmt::Write;
    let mut s = String::with_capacity(KEY_SIZE * 2);
    for c in hex {
        write!(s, "{:02x}", c).unwrap(); // safe, just dump hex to string
    }
    s
}

/// Decode the hex str into [HashBinary].
///
/// Return `None` when the decode fails or the input is not exact 32 (to decode to 16 bytes).
pub fn str2hex(s: &str) -> Option<HashBinary> {
    if s.len() != KEY_SIZE * 2 {
        return None;
    }
    let mut output = [0; KEY_SIZE];
    // no need to bubble the error, it should be obvious why the decode fails
    hex::decode_to_slice(s.as_bytes(), &mut output).ok()?;
    Some(output)
}

/// The trait for cache key
pub trait CacheHashKey {
    /// Return the hash of the cache key
    fn primary_bin(&self) -> HashBinary;

    /// Return the variance hash of the cache key.
    ///
    /// `None` if no variance.
    fn variance_bin(&self) -> Option<HashBinary>;

    /// Return the hash including both primary and variance keys
    fn combined_bin(&self) -> HashBinary {
        let key = self.primary_bin();
        if let Some(v) = self.variance_bin() {
            let mut hasher = Blake2b128::new();
            hasher.update(key);
            hasher.update(v);
            hasher.finalize().into()
        } else {
            // if there is no variance, combined_bin should return the same as primary_bin
            key
        }
    }

    /// An extra tag for identifying users
    ///
    /// For example, if the storage backend implements per user quota, this tag can be used.
    fn user_tag(&self) -> &str;

    /// The hex string of [Self::primary_bin()]
    fn primary(&self) -> String {
        hex2str(&self.primary_bin())
    }

    /// The hex string of [Self::variance_bin()]
    fn variance(&self) -> Option<String> {
        self.variance_bin().as_ref().map(|b| hex2str(&b[..]))
    }

    /// The hex string of [Self::combined_bin()]
    fn combined(&self) -> String {
        hex2str(&self.combined_bin())
    }
}

/// General purpose cache key
#[derive(Debug, Clone)]
pub struct CacheKey {
    // Namespace and primary fields are essentially strings,
    // except they allow invalid UTF-8 sequences.
    // These fields should be able to be hashed.
    namespace: Vec<u8>,
    primary: Vec<u8>,
    primary_bin_override: Option<HashBinary>,
    variance: Option<HashBinary>,
    /// An extra tag for identifying users
    ///
    /// For example, if the storage backend implements per user quota, this tag can be used.
    pub user_tag: String,

    /// Grab-bag for user-defined extensions. These will not be persisted to disk.
    pub extensions: Extensions,
}

impl CacheKey {
    /// Set the value of the variance hash
    pub fn set_variance_key(&mut self, key: HashBinary) {
        self.variance = Some(key)
    }

    /// Get the value of the variance hash
    pub fn get_variance_key(&self) -> Option<&HashBinary> {
        self.variance.as_ref()
    }

    /// Removes the variance from this cache key
    pub fn remove_variance_key(&mut self) {
        self.variance = None
    }

    /// Override the primary key hash
    pub fn set_primary_bin_override(&mut self, key: HashBinary) {
        self.primary_bin_override = Some(key)
    }

    /// Try to get primary key as UTF-8 str, if valid
    pub fn primary_key_str(&self) -> Option<&str> {
        std::str::from_utf8(&self.primary).ok()
    }

    /// Try to get namespace key as UTF-8 str, if valid
    pub fn namespace_str(&self) -> Option<&str> {
        std::str::from_utf8(&self.namespace).ok()
    }
}

/// Storage optimized cache key to keep in memory or in storage
// 16 bytes + 8 bytes (+16 * u8) + user_tag.len() + 16 Bytes (Box<str>)
#[derive(Debug, Deserialize, Serialize, Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct CompactCacheKey {
    pub primary: HashBinary,
    // save 8 bytes for non-variance but waste 8 bytes for variance vs, store flat 16 bytes
    pub variance: Option<Box<HashBinary>>,
    pub user_tag: Box<str>, // the len should be small to keep memory usage bounded
}

impl Display for CompactCacheKey {
    fn fmt(&self, f: &mut Formatter) -> FmtResult {
        write!(f, "{}", hex2str(&self.primary))?;
        if let Some(var) = &self.variance {
            write!(f, ", variance: {}", hex2str(var.as_ref()))?;
        }
        write!(f, ", user_tag: {}", self.user_tag)
    }
}

impl CacheHashKey for CompactCacheKey {
    fn primary_bin(&self) -> HashBinary {
        self.primary
    }

    fn variance_bin(&self) -> Option<HashBinary> {
        self.variance.as_ref().map(|s| *s.as_ref())
    }

    fn user_tag(&self) -> &str {
        &self.user_tag
    }
}

/*
 * We use blake2 hashing, which is faster and more secure, to replace md5.
 * We have not given too much thought on whether non-crypto hash can be safely
 * use because hashing performance is not critical.
 * Note: we should avoid hashes like ahash which does not have consistent output
 * across machines because it is designed purely for in memory hashtable
*/

// hash output: we use 128 bits (16 bytes) hash which will map to 32 bytes hex string
pub(crate) type Blake2b128 = Blake2b<blake2::digest::consts::U16>;

/// helper function: hash str to u8
pub fn hash_u8(key: &str) -> u8 {
    let mut hasher = Blake2b128::new();
    hasher.update(key);
    let raw = hasher.finalize();
    raw[0]
}

/// helper function: hash key (String or Bytes) to [HashBinary]
pub fn hash_key<K: AsRef<[u8]>>(key: K) -> HashBinary {
    let mut hasher = Blake2b128::new();
    hasher.update(key.as_ref());
    let raw = hasher.finalize();
    raw.into()
}

impl CacheKey {
    fn primary_hasher(&self) -> Blake2b128 {
        let mut hasher = Blake2b128::new();
        hasher.update(&self.namespace);
        hasher.update(&self.primary);
        hasher
    }

    /// Create a new [CacheKey] from the given namespace, primary, and user_tag input.
    ///
    /// Both `namespace` and `primary` will be used for the primary hash
    pub fn new<B1, B2, S>(namespace: B1, primary: B2, user_tag: S) -> Self
    where
        B1: Into<Vec<u8>>,
        B2: Into<Vec<u8>>,
        S: Into<String>,
    {
        CacheKey {
            namespace: namespace.into(),
            primary: primary.into(),
            primary_bin_override: None,
            variance: None,
            user_tag: user_tag.into(),
            extensions: Extensions::new(),
        }
    }

    /// Return the namespace of this key
    pub fn namespace(&self) -> &[u8] {
        &self.namespace[..]
    }

    /// Return the primary key of this key
    pub fn primary_key(&self) -> &[u8] {
        &self.primary[..]
    }

    /// Convert this key to [CompactCacheKey].
    pub fn to_compact(&self) -> CompactCacheKey {
        let primary = self.primary_bin();
        CompactCacheKey {
            primary,
            variance: self.variance_bin().map(Box::new),
            user_tag: self.user_tag.clone().into_boxed_str(),
        }
    }
}

impl CacheHashKey for CacheKey {
    fn primary_bin(&self) -> HashBinary {
        if let Some(primary_bin_override) = self.primary_bin_override {
            primary_bin_override
        } else {
            self.primary_hasher().finalize().into()
        }
    }

    fn variance_bin(&self) -> Option<HashBinary> {
        self.variance
    }

    fn user_tag(&self) -> &str {
        &self.user_tag
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_cache_key_hash() {
        let key = CacheKey {
            namespace: Vec::new(),
            primary: b"aa".to_vec(),
            primary_bin_override: None,
            variance: None,
            user_tag: "1".into(),
            extensions: Extensions::new(),
        };
        let hash = key.primary();
        assert_eq!(hash, "ac10f2aef117729f8dad056b3059eb7e");
        assert!(key.variance().is_none());
        assert_eq!(key.combined(), hash);
        let compact = key.to_compact();
        assert_eq!(compact.primary(), hash);
        assert!(compact.variance().is_none());
        assert_eq!(compact.combined(), hash);
    }

    #[test]
    fn test_cache_key_hash_override() {
        let mut key = CacheKey {
            namespace: Vec::new(),
            primary: b"aa".to_vec(),
            primary_bin_override: str2hex("27c35e6e9373877f29e562464e46497e"),
            variance: None,
            user_tag: "1".into(),
            extensions: Extensions::new(),
        };
        let hash = key.primary();
        assert_eq!(hash, "27c35e6e9373877f29e562464e46497e");
        assert!(key.variance().is_none());
        assert_eq!(key.combined(), hash);
        let compact = key.to_compact();
        assert_eq!(compact.primary(), hash);
        assert!(compact.variance().is_none());
        assert_eq!(compact.combined(), hash);

        // make sure set_primary_bin_override overrides the primary key hash correctly
        key.set_primary_bin_override(str2hex("004174d3e75a811a5b44c46b3856f3ee").unwrap());
        let hash = key.primary();
        assert_eq!(hash, "004174d3e75a811a5b44c46b3856f3ee");
        assert!(key.variance().is_none());
        assert_eq!(key.combined(), hash);
        let compact = key.to_compact();
        assert_eq!(compact.primary(), hash);
        assert!(compact.variance().is_none());
        assert_eq!(compact.combined(), hash);
    }

    #[test]
    fn test_cache_key_vary_hash() {
        let key = CacheKey {
            namespace: Vec::new(),
            primary: b"aa".to_vec(),
            primary_bin_override: None,
            variance: Some([0u8; 16]),
            user_tag: "1".into(),
            extensions: Extensions::new(),
        };
        let hash = key.primary();
        assert_eq!(hash, "ac10f2aef117729f8dad056b3059eb7e");
        assert_eq!(key.variance().unwrap(), "00000000000000000000000000000000");
        assert_eq!(key.combined(), "004174d3e75a811a5b44c46b3856f3ee");
        let compact = key.to_compact();
        assert_eq!(compact.primary(), "ac10f2aef117729f8dad056b3059eb7e");
        assert_eq!(
            compact.variance().unwrap(),
            "00000000000000000000000000000000"
        );
        assert_eq!(compact.combined(), "004174d3e75a811a5b44c46b3856f3ee");
    }

    #[test]
    fn test_cache_key_vary_hash_override() {
        let key = CacheKey {
            namespace: Vec::new(),
            primary: b"saaaad".to_vec(),
            primary_bin_override: str2hex("ac10f2aef117729f8dad056b3059eb7e"),
            variance: Some([0u8; 16]),
            user_tag: "1".into(),
            extensions: Extensions::new(),
        };
        let hash = key.primary();
        assert_eq!(hash, "ac10f2aef117729f8dad056b3059eb7e");
        assert_eq!(key.variance().unwrap(), "00000000000000000000000000000000");
        assert_eq!(key.combined(), "004174d3e75a811a5b44c46b3856f3ee");
        let compact = key.to_compact();
        assert_eq!(compact.primary(), "ac10f2aef117729f8dad056b3059eb7e");
        assert_eq!(
            compact.variance().unwrap(),
            "00000000000000000000000000000000"
        );
        assert_eq!(compact.combined(), "004174d3e75a811a5b44c46b3856f3ee");
    }

    #[test]
    fn test_hex_str() {
        let mut key = [0; KEY_SIZE];
        for (i, v) in key.iter_mut().enumerate() {
            // key: [0, 1, 2, .., 15]
            *v = i as u8;
        }
        let hex_str = hex2str(&key);
        let key2 = str2hex(&hex_str).unwrap();
        for i in 0..KEY_SIZE {
            assert_eq!(key[i], key2[i]);
        }
    }
    #[test]
    fn test_primary_key_str_valid_utf8() {
        let valid_utf8_key = CacheKey {
            namespace: Vec::new(),
            primary: b"/valid/path?query=1".to_vec(),
            primary_bin_override: None,
            variance: None,
            user_tag: "1".into(),
            extensions: Extensions::new(),
        };

        assert_eq!(
            valid_utf8_key.primary_key_str(),
            Some("/valid/path?query=1")
        )
    }

    #[test]
    fn test_primary_key_str_invalid_utf8() {
        let invalid_utf8_key = CacheKey {
            namespace: Vec::new(),
            primary: vec![0x66, 0x6f, 0x6f, 0xff],
            primary_bin_override: None,
            variance: None,
            user_tag: "1".into(),
            extensions: Extensions::new(),
        };

        assert!(invalid_utf8_key.primary_key_str().is_none())
    }
}


================================================
FILE: pingora-cache/src/lib.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! The HTTP caching layer for proxies.

#![allow(clippy::new_without_default)]

use cf_rustracing::tag::Tag;
use http::{method::Method, request::Parts as ReqHeader, response::Parts as RespHeader};
use key::{CacheHashKey, CompactCacheKey, HashBinary};
use lock::WritePermit;
use log::warn;
use pingora_error::Result;
use pingora_http::ResponseHeader;
use pingora_timeout::timeout;
use std::time::{Duration, Instant, SystemTime};
use storage::MissFinishType;
use strum::IntoStaticStr;
use trace::{CacheTraceCTX, Span};

pub mod cache_control;
pub mod eviction;
pub mod filters;
pub mod hashtable;
pub mod key;
pub mod lock;
pub mod max_file_size;
mod memory;
pub mod meta;
pub mod predictor;
pub mod put;
pub mod storage;
pub mod trace;
mod variance;

use crate::max_file_size::MaxFileSizeTracker;
pub use key::CacheKey;
use lock::{CacheKeyLockImpl, LockStatus, Locked};
pub use memory::MemCache;
pub use meta::{set_compression_dict_content, set_compression_dict_path};
pub use meta::{CacheMeta, CacheMetaDefaults};
pub use storage::{HitHandler, MissHandler, PurgeType, Storage};
pub use variance::VarianceBuilder;

pub mod prelude {}

/// The state machine for http caching
///
/// This object is used to handle the state and transitions for HTTP caching through the life of a
/// request.
pub struct HttpCache {
    phase: CachePhase,
    // Box the rest so that a disabled HttpCache struct is small
    inner: Option<Box<HttpCacheInner>>,
    digest: HttpCacheDigest,
}

/// This reflects the phase of HttpCache during the lifetime of a request
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum CachePhase {
    /// Cache disabled, with reason (NeverEnabled if never explicitly used)
    Disabled(NoCacheReason),
    /// Cache enabled but nothing is set yet
    Uninit,
    /// Cache was enabled, the request decided not to use it
    // HttpCache.inner_enabled is kept
    Bypass,
    /// Awaiting the cache key to be generated
    CacheKey,
    /// Cache hit
    Hit,
    /// No cached asset is found
    Miss,
    /// A staled (expired) asset is found
    Stale,
    /// A staled (expired) asset was found, but another request is revalidating it
    StaleUpdating,
    /// A staled (expired) asset was found, so a fresh one was fetched
    Expired,
    /// A staled (expired) asset was found, and it was revalidated to be fresh
    Revalidated,
    /// Revalidated, but deemed uncacheable, so we do not freshen it
    RevalidatedNoCache(NoCacheReason),
}

impl CachePhase {
    /// Convert [CachePhase] as `str`, for logging and debugging.
    pub fn as_str(&self) -> &'static str {
        match self {
            CachePhase::Disabled(_) => "disabled",
            CachePhase::Uninit => "uninitialized",
            CachePhase::Bypass => "bypass",
            CachePhase::CacheKey => "key",
            CachePhase::Hit => "hit",
            CachePhase::Miss => "miss",
            CachePhase::Stale => "stale",
            CachePhase::StaleUpdating => "stale-updating",
            CachePhase::Expired => "expired",
            CachePhase::Revalidated => "revalidated",
            CachePhase::RevalidatedNoCache(_) => "revalidated-nocache",
        }
    }
}

/// The possible reasons for not caching
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum NoCacheReason {
    /// Caching is not enabled to begin with
    NeverEnabled,
    /// Origin directives indicated this was not cacheable
    OriginNotCache,
    /// Response size was larger than the cache's configured maximum asset size
    ResponseTooLarge,
    /// Disabling caching due to unknown body size and previously exceeding maximum asset size;
    /// the asset is otherwise cacheable, but cache needs to confirm the final size of the asset
    /// before it can mark it as cacheable again.
    PredictedResponseTooLarge,
    /// Due to internal caching storage error
    StorageError,
    /// Due to other types of internal issues
    InternalError,
    /// will be cacheable but skip cache admission now
    ///
    /// This happens when the cache predictor predicted that this request is not cacheable, but
    /// the response turns out to be OK to cache. However, it might be too large to re-enable caching
    /// for this request
    Deferred,
    /// Due to the proxy upstream filter declining the current request from going upstream
    DeclinedToUpstream,
    /// Due to the upstream being unreachable or otherwise erroring during proxying
    UpstreamError,
    /// The writer of the cache lock sees that the request is not cacheable (Could be OriginNotCache)
    CacheLockGiveUp,
    /// This request waited too long for the writer of the cache lock to finish, so this request will
    /// fetch from the origin without caching
    CacheLockTimeout,
    /// Other custom defined reasons
    Custom(&'static str),
}

impl NoCacheReason {
    /// Convert [NoCacheReason] as `str`, for logging and debugging.
    pub fn as_str(&self) -> &'static str {
        use NoCacheReason::*;
        match self {
            NeverEnabled => "NeverEnabled",
            OriginNotCache => "OriginNotCache",
            ResponseTooLarge => "ResponseTooLarge",
            PredictedResponseTooLarge => "PredictedResponseTooLarge",
            StorageError => "StorageError",
            InternalError => "InternalError",
            Deferred => "Deferred",
            DeclinedToUpstream => "DeclinedToUpstream",
            UpstreamError => "UpstreamError",
            CacheLockGiveUp => "CacheLockGiveUp",
            CacheLockTimeout => "CacheLockTimeout",
            Custom(s) => s,
        }
    }
}

/// Information collected about the caching operation that will not be cleared
#[derive(Debug, Default)]
pub struct HttpCacheDigest {
    pub lock_duration: Option<Duration>,
    // time spent in cache lookup and reading the header
    pub lookup_duration: Option<Duration>,
}

/// Convenience function to add a duration to an optional duration
fn add_duration_to_opt(target_opt: &mut Option<Duration>, to_add: Duration) {
    *target_opt = Some(target_opt.map_or(to_add, |existing| existing + to_add));
}

impl HttpCacheDigest {
    fn add_lookup_duration(&mut self, extra_lookup_duration: Duration) {
        add_duration_to_opt(&mut self.lookup_duration, extra_lookup_duration)
    }

    fn add_lock_duration(&mut self, extra_lock_duration: Duration) {
        add_duration_to_opt(&mut self.lock_duration, extra_lock_duration)
    }
}

/// Response cacheable decision
///
///
#[derive(Debug)]
pub enum RespCacheable {
    Cacheable(CacheMeta),
    Uncacheable(NoCacheReason),
}

impl RespCacheable {
    /// Whether it is cacheable
    #[inline]
    pub fn is_cacheable(&self) -> bool {
        matches!(*self, Self::Cacheable(_))
    }

    /// Unwrap [RespCacheable] to get the [CacheMeta] stored
    /// # Panic
    /// Panic when this object is not cacheable. Check [Self::is_cacheable()] first.
    pub fn unwrap_meta(self) -> CacheMeta {
        match self {
            Self::Cacheable(meta) => meta,
            Self::Uncacheable(_) => panic!("expected Cacheable value"),
        }
    }
}

/// Indicators of which level of cache freshness logic to force apply to an asset.
///
/// For example, should an existing fresh asset be revalidated or re-retrieved altogether.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ForcedFreshness {
    /// Indicates the asset should be considered stale and revalidated
    ForceExpired,

    /// Indicates the asset should be considered absent and treated like a miss
    /// instead of a hit
    ForceMiss,

    /// Indicates the asset should be considered fresh despite possibly being stale
    ForceFresh,
}

/// Freshness state of cache hit asset
///
///
#[derive(Debug, Copy, Clone, IntoStaticStr, PartialEq, Eq)]
#[strum(serialize_all = "snake_case")]
pub enum HitStatus {
    /// The asset's freshness directives indicate it has expired
    Expired,

    /// The asset was marked as expired, and should be treated as stale
    ForceExpired,

    /// The asset was marked as absent, and should be treated as a miss
    ForceMiss,

    /// An error occurred while processing the asset, so it should be treated as
    /// a miss
    FailedHitFilter,

    /// The asset is not expired
    Fresh,

    /// Asset exists but is expired, forced to be a hit
    ForceFresh,
}

impl HitStatus {
    /// For displaying cache hit status
    pub fn as_str(&self) -> &'static str {
        self.into()
    }

    /// Whether cached asset can be served as fresh
    pub fn is_fresh(&self) -> bool {
        *self == HitStatus::Fresh || *self == HitStatus::ForceFresh
    }

    /// Check whether the hit status should be treated as a miss. A forced miss
    /// is obviously treated as a miss. A hit-filter failure is treated as a
    /// miss because we can't use the asset as an actual hit. If we treat it as
    /// expired, we still might not be able to use it even if revalidation
    /// succeeds.
    pub fn is_treated_as_miss(self) -> bool {
        matches!(self, HitStatus::ForceMiss | HitStatus::FailedHitFilter)
    }
}

pub struct LockCtx {
    pub lock: Option<Locked>,
    pub cache_lock: &'static CacheKeyLockImpl,
    pub wait_timeout: Option<Duration>,
}

// Fields like storage handlers that are needed only when cache is enabled (or bypassing).
struct HttpCacheInnerEnabled {
    pub meta: Option<CacheMeta>,
    // when set, even if an asset exists, it would only be considered valid after this timestamp
    pub valid_after: Option<SystemTime>,
    pub miss_handler: Option<MissHandler>,
    pub body_reader: Option<HitHandler>,
    pub storage: &'static (dyn storage::Storage + Sync), // static for now
    pub eviction: Option<&'static (dyn eviction::EvictionManager + Sync)>,
    pub lock_ctx: Option<LockCtx>,
    pub traces: trace::CacheTraceCTX,
}

struct HttpCacheInner {
    // Prefer adding fields to InnerEnabled if possible, these fields are released
    // when cache is disabled.
    // If fields are needed after cache disablement, add directly to Inner.
    pub enabled_ctx: Option<Box<HttpCacheInnerEnabled>>,
    pub key: Option<CacheKey>,
    // when set, an asset will be rejected from the cache if it exceeds configured size in bytes
    pub max_file_size_tracker: Option<MaxFileSizeTracker>,
    pub predictor: Option<&'static (dyn predictor::CacheablePredictor + Sync)>,
}

#[derive(Debug, Default)]
#[non_exhaustive]
pub struct CacheOptionOverrides {
    pub wait_timeout: Option<Duration>,
}

impl HttpCache {
    /// Create a new [HttpCache].
    ///
    /// Caching is not enabled by default.
    pub fn new() -> Self {
        HttpCache {
            phase: CachePhase::Disabled(NoCacheReason::NeverEnabled),
            inner: None,
            digest: HttpCacheDigest::default(),
        }
    }

    /// Whether the cache is enabled
    pub fn enabled(&self) -> bool {
        !matches!(self.phase, CachePhase::Disabled(_) | CachePhase::Bypass)
    }

    /// Whether the cache is being bypassed
    pub fn bypassing(&self) -> bool {
        matches!(self.phase, CachePhase::Bypass)
    }

    /// Return the [CachePhase]
    pub fn phase(&self) -> CachePhase {
        self.phase
    }

    /// Whether anything was fetched from the upstream
    ///
    /// This essentially checks all possible [CachePhase] who need to contact the upstream server
    pub fn upstream_used(&self) -> bool {
        use CachePhase::*;
        match self.phase {
            Disabled(_) | Bypass | Miss | Expired | Revalidated | RevalidatedNoCache(_) => true,
            Hit | Stale | StaleUpdating => false,
            Uninit | CacheKey => false, // invalid states for this call, treat them as false to keep it simple
        }
    }

    /// Check whether the backend storage is the type `T`.
    pub fn storage_type_is<T: 'static>(&self) -> bool {
        self.inner
            .as_ref()
            .and_then(|inner| {
                inner
                    .enabled_ctx
                    .as_ref()
                    .and_then(|ie| ie.storage.as_any().downcast_ref::<T>())
            })
            .is_some()
    }

    /// Release the cache lock if the current request is a cache writer.
    ///
    /// Generally callers should prefer using `disable` when a cache lock should be released
    /// due to an error to clear all cache context. This function is for releasing the cache lock
    /// while still keeping the cache around for reading, e.g. when serving stale.
    pub fn release_write_lock(&mut self, reason: NoCacheReason) {
        use NoCacheReason::*;
        if let Some(inner) = self.inner.as_mut() {
            if let Some(lock_ctx) = inner
                .enabled_ctx
                .as_mut()
                .and_then(|ie| ie.lock_ctx.as_mut())
            {
                let lock = lock_ctx.lock.take();
                if let Some(Locked::Write(permit)) = lock {
                    let lock_status = match reason {
                        // let the next request try to fetch it
                        InternalError | StorageError | Deferred | UpstreamError => {
                            LockStatus::TransientError
                        }
                        // depends on why the proxy upstream filter declined the request,
                        // for now still allow next request try to acquire to avoid thundering herd
                        DeclinedToUpstream => LockStatus::TransientError,
                        // no need for the lock anymore
                        OriginNotCache | ResponseTooLarge | PredictedResponseTooLarge => {
                            LockStatus::GiveUp
                        }
                        Custom(reason) => lock_ctx.cache_lock.custom_lock_status(reason),
                        // should never happen, NeverEnabled shouldn't hold a lock
                        NeverEnabled => panic!("NeverEnabled holds a write lock"),
                        CacheLockGiveUp | CacheLockTimeout => {
                            panic!("CacheLock* are for cache lock readers only")
                        }
                    };
                    lock_ctx
                        .cache_lock
                        .release(inner.key.as_ref().unwrap(), permit, lock_status);
                }
            }
        }
    }

    /// Disable caching
    pub fn disable(&mut self, reason: NoCacheReason) {
        // XXX: compile type enforce?
        assert!(
            reason != NoCacheReason::NeverEnabled,
            "NeverEnabled not allowed as a disable reason"
        );
        match self.phase {
            CachePhase::Disabled(old_reason) => {
                // replace reason
                if old_reason == NoCacheReason::NeverEnabled {
                    // safeguard, don't allow replacing NeverEnabled as a reason
                    // TODO: can be promoted to assertion once confirmed nothing is attempting this
                    warn!("Tried to replace cache NeverEnabled with reason: {reason:?}");
                    return;
                }
                self.phase = CachePhase::Disabled(reason);
            }
            _ => {
                self.phase = CachePhase::Disabled(reason);
                self.release_write_lock(reason);
                // enabled_ctx will be cleared out
                let mut inner_enabled = self
                    .inner_mut()
                    .enabled_ctx
                    .take()
                    .expect("could remove enabled_ctx on disable");
                // log initial disable reason
                inner_enabled
                    .traces
                    .cache_span
                    .set_tag(|| trace::Tag::new("disable_reason", reason.as_str()));
            }
        }
    }

    /* The following methods panic when they are used in the wrong phase.
     * This is better than returning errors as such panics are only caused by coding error, which
     * should be fixed right away. Tokio runtime only crashes the current task instead of the whole
     * program when these panics happen. */

    /// Set the cache to bypass
    ///
    /// # Panic
    /// This call is only allowed in [CachePhase::CacheKey] phase (before any cache lookup is performed).
    /// Use it in any other phase will lead to panic.
    pub fn bypass(&mut self) {
        match self.phase {
            CachePhase::CacheKey => {
                // before cache lookup / found / miss
                self.phase = CachePhase::Bypass;
                self.inner_enabled_mut()
                    .traces
                    .cache_span
                    .set_tag(|| trace::Tag::new("bypassed", true));
            }
            _ => panic!("wrong phase to bypass HttpCache {:?}", self.phase),
        }
    }

    /// Enable the cache
    ///
    /// - `storage`: the cache storage backend that implements [storage::Storage]
    /// - `eviction`: optionally the eviction manager, without it, nothing will be evicted from the storage
    /// - `predictor`: optionally a cache predictor. The cache predictor predicts whether something is likely
    ///   to be cacheable or not. This is useful because the proxy can apply different types of optimization to
    ///   cacheable and uncacheable requests.
    /// - `cache_lock`: optionally a cache lock which handles concurrent lookups to the same asset. Without it
    ///   such lookups will all be allowed to fetch the asset independently.
    pub fn enable(
        &mut self,
        storage: &'static (dyn storage::Storage + Sync),
        eviction: Option<&'static (dyn eviction::EvictionManager + Sync)>,
        predictor: Option<&'static (dyn predictor::CacheablePredictor + Sync)>,
        cache_lock: Option<&'static CacheKeyLockImpl>,
        option_overrides: Option<CacheOptionOverrides>,
    ) {
        match self.phase {
            CachePhase::Disabled(_) => {
                self.phase = CachePhase::Uninit;

                let lock_ctx = cache_lock.map(|cache_lock| LockCtx {
                    cache_lock,
                    lock: None,
                    wait_timeout: option_overrides
                        .as_ref()
                        .and_then(|overrides| overrides.wait_timeout),
                });

                self.inner = Some(Box::new(HttpCacheInner {
                    enabled_ctx: Some(Box::new(HttpCacheInnerEnabled {
                        meta: None,
                        valid_after: None,
                        miss_handler: None,
                        body_reader: None,
                        storage,
                        eviction,
                        lock_ctx,
                        traces: CacheTraceCTX::new(),
                    })),
                    key: None,
                    max_file_size_tracker: None,
                    predictor,
                }));
            }
            _ => panic!("Cannot enable already enabled HttpCache {:?}", self.phase),
        }
    }

    /// Set the cache lock implementation.
    /// # Panic
    /// Must be called before a cache lock is attempted to be acquired,
    /// i.e. in the `cache_key_callback` or `cache_hit_filter` phases.
    pub fn set_cache_lock(
        &mut self,
        cache_lock: Option<&'static CacheKeyLockImpl>,
        option_overrides: Option<CacheOptionOverrides>,
    ) {
        match self.phase {
            CachePhase::Disabled(_)
            | CachePhase::CacheKey
            | CachePhase::Stale
            | CachePhase::Hit => {
                let inner_enabled = self.inner_enabled_mut();
                if inner_enabled
                    .lock_ctx
                    .as_ref()
                    .is_some_and(|ctx| ctx.lock.is_some())
                {
                    panic!("lock already set when resetting cache lock")
                } else {
                    let lock_ctx = cache_lock.map(|cache_lock| LockCtx {
                        cache_lock,
                        lock: None,
                        wait_timeout: option_overrides.and_then(|overrides| overrides.wait_timeout),
                    });
                    inner_enabled.lock_ctx = lock_ctx;
                }
            }
            _ => panic!("wrong phase: {:?}", self.phase),
        }
    }

    // Enable distributed tracing
    pub fn enable_tracing(&mut self, parent_span: trace::Span) {
        if let Some(inner_enabled) = self.inner.as_mut().and_then(|i| i.enabled_ctx.as_mut()) {
            inner_enabled.traces.enable(parent_span);
        }
    }

    // Get the cache parent tracing span
    pub fn get_cache_span(&self) -> Option<trace::SpanHandle> {
        self.inner
            .as_ref()
            .and_then(|i| i.enabled_ctx.as_ref().map(|ie| ie.traces.get_cache_span()))
    }

    // Get the cache `miss` tracing span
    pub fn get_miss_span(&self) -> Option<trace::SpanHandle> {
        self.inner
            .as_ref()
            .and_then(|i| i.enabled_ctx.as_ref().map(|ie| ie.traces.get_miss_span()))
    }

    // Get the cache `hit` tracing span
    pub fn get_hit_span(&self) -> Option<trace::SpanHandle> {
        self.inner
            .as_ref()
            .and_then(|i| i.enabled_ctx.as_ref().map(|ie| ie.traces.get_hit_span()))
    }

    // shortcut to access inner fields, panic if phase is disabled
    #[inline]
    fn inner_enabled_mut(&mut self) -> &mut HttpCacheInnerEnabled {
        self.inner.as_mut().unwrap().enabled_ctx.as_mut().unwrap()
    }

    #[inline]
    fn inner_enabled(&self) -> &HttpCacheInnerEnabled {
        self.inner.as_ref().unwrap().enabled_ctx.as_ref().unwrap()
    }

    // shortcut to access inner fields, panic if cache was never enabled
    #[inline]
    fn inner_mut(&mut self) -> &mut HttpCacheInner {
        self.inner.as_mut().unwrap()
    }

    #[inline]
    fn inner(&self) -> &HttpCacheInner {
        self.inner.as_ref().unwrap()
    }

    /// Set the cache key
    /// # Panic
    /// Cache key is only allowed to be set in its own phase. Set it in other phases will cause panic.
    pub fn set_cache_key(&mut self, key: CacheKey) {
        match self.phase {
            CachePhase::Uninit | CachePhase::CacheKey => {
                self.phase = CachePhase::CacheKey;
                self.inner_mut().key = Some(key);
            }
            _ => panic!("wrong phase {:?}", self.phase),
        }
    }

    /// Return the cache key used for asset lookup
    /// # Panic
    /// Can only be called after the cache key is set and the cache is not disabled. Panic otherwise.
    pub fn cache_key(&self) -> &CacheKey {
        match self.phase {
            CachePhase::Disabled(NoCacheReason::NeverEnabled) | CachePhase::Uninit => {
                panic!("wrong phase {:?}", self.phase)
            }
            _ => self
                .inner()
                .key
                .as_ref()
                .expect("cache key should be set (set_cache_key not called?)"),
        }
    }

    /// Return the max size allowed to be cached.
    pub fn max_file_size_bytes(&self) -> Option<usize> {
        assert!(
            !matches!(
                self.phase,
                CachePhase::Disabled(NoCacheReason::NeverEnabled)
            ),
            "tried to access max file size bytes when cache never enabled"
        );
        self.inner()
            .max_file_size_tracker
            .as_ref()
            .map(|t| t.max_file_size_bytes())
    }

    /// Set the maximum response _body_ size in bytes that will be admitted to the cache.
    ///
    /// Response header size should not contribute to the max file size.
    ///
    /// To track body bytes, call `track_bytes_for_max_file_size`.
    pub fn set_max_file_size_bytes(&mut self, max_file_size_bytes: usize) {
        match self.phase {
            CachePhase::Disabled(_) => panic!("wrong phase {:?}", self.phase),
            _ => {
                self.inner_mut().max_file_size_tracker =
                    Some(MaxFileSizeTracker::new(max_file_size_bytes));
            }
        }
    }

    /// Record body bytes for the max file size tracker.
    ///
    /// The `bytes_len` input contributes to a cumulative body byte tracker.
    ///
    /// Once the cumulative body bytes exceeds the maximum allowable cache file size (as configured
    /// by `set_max_file_size_bytes`), then the return value will be false.
    ///
    /// Else the return value is true as long as the max file size is not exceeded.
    /// If max file size was not configured, the return value is always true.
    pub fn track_body_bytes_for_max_file_size(&mut self, bytes_len: usize) -> bool {
        // This is intended to be callable when cache has already been disabled,
        // so that we can re-mark an asset as cacheable if the body size is under limits.
        assert!(
            !matches!(
                self.phase,
                CachePhase::Disabled(NoCacheReason::NeverEnabled)
            ),
            "tried to access max file size bytes when cache never enabled"
        );
        self.inner_mut()
            .max_file_size_tracker
            .as_mut()
            .is_none_or(|t| t.add_body_bytes(bytes_len))
    }

    /// Check if the max file size has been exceeded according to max file size tracker.
    ///
    /// Return true if max file size was exceeded.
    pub fn exceeded_max_file_size(&self) -> bool {
        assert!(
            !matches!(
                self.phase,
                CachePhase::Disabled(NoCacheReason::NeverEnabled)
            ),
            "tried to access max file size bytes when cache never enabled"
        );
        self.inner()
            .max_file_size_tracker
            .as_ref()
            .is_some_and(|t| !t.allow_caching())
    }

    /// Set that cache is found in cache storage.
    ///
    /// This function is called after [Self::cache_lookup()] which returns the [CacheMeta] and
    /// [HitHandler].
    ///
    /// The `hit_status` enum allows the caller to force expire assets.
    pub fn cache_found(&mut self, meta: CacheMeta, hit_handler: HitHandler, hit_status: HitStatus) {
        // Stale allowed because of cache lock and then retry
        if !matches!(self.phase, CachePhase::CacheKey | CachePhase::Stale) {
            panic!("wrong phase {:?}", self.phase)
        }

        self.phase = match hit_status {
            HitStatus::Fresh | HitStatus::ForceFresh => CachePhase::Hit,
            HitStatus::Expired | HitStatus::ForceExpired => CachePhase::Stale,
            HitStatus::FailedHitFilter | HitStatus::ForceMiss => self.phase,
        };

        let phase = self.phase;
        let inner = self.inner_mut();

        let key = inner.key.as_ref().expect("key must be set on hit");
        let inner_enabled = inner
            .enabled_ctx
            .as_mut()
            .expect("cache_found must be called while cache enabled");

        // The cache lock might not be set for stale hit or hits treated as
        // misses, so we need to initialize it here
        let stale = phase == CachePhase::Stale;
        if stale || hit_status.is_treated_as_miss() {
            if let Some(lock_ctx) = inner_enabled.lock_ctx.as_mut() {
                lock_ctx.lock = Some(lock_ctx.cache_lock.lock(key, stale));
            }
        }

        if hit_status.is_treated_as_miss() {
            // Clear the body and meta for hits that are treated as misses
            inner_enabled.body_reader = None;
            inner_enabled.meta = None;
        } else {
            // Set the metadata appropriately for legit hits
            inner_enabled.traces.start_hit_span(phase, hit_status);
            inner_enabled.traces.log_meta_in_hit_span(&meta);
            if let Some(eviction) = inner_enabled.eviction {
                // TODO: make access() accept CacheKey
                let cache_key = key.to_compact();
                if hit_handler.should_count_access() {
                    let size = hit_handler.get_eviction_weight();
                    eviction.access(&cache_key, size, meta.0.internal.fresh_until);
                }
            }
            inner_enabled.meta = Some(meta);
            inner_enabled.body_reader = Some(hit_handler);
        }
    }

    /// Mark `self` to be cache miss.
    ///
    /// This function is called after [Self::cache_lookup()] finds nothing or the caller decides
    /// not to use the assets found.
    /// # Panic
    /// Panic in other phases.
    pub fn cache_miss(&mut self) {
        match self.phase {
            // from CacheKey: set state to miss during cache lookup
            // from Bypass: response became cacheable, set state to miss to cache
            // from Stale: waited for cache lock, then retried and found asset was gone
            CachePhase::CacheKey | CachePhase::Bypass | CachePhase::Stale => {
                self.phase = CachePhase::Miss;
                // It's possible that we've set the meta on lookup and have come back around
                // here after not being able to acquire the cache lock, and our item has since
                // purged or expired. We should be sure that the meta is not set in this case
                // as there shouldn't be a meta set for cache misses.
                self.inner_enabled_mut().meta = None;
                self.inner_enabled_mut().traces.start_miss_span();
            }
            _ => panic!("wrong phase {:?}", self.phase),
        }
    }

    /// Return the [HitHandler]
    /// # Panic
    /// Call this after [Self::cache_found()], panic in other phases.
    pub fn hit_handler(&mut self) -> &mut HitHandler {
        match self.phase {
            CachePhase::Hit
            | CachePhase::Stale
            | CachePhase::StaleUpdating
            | CachePhase::Revalidated
            | CachePhase::RevalidatedNoCache(_) => {
                self.inner_enabled_mut().body_reader.as_mut().unwrap()
            }
            _ => panic!("wrong phase {:?}", self.phase),
        }
    }

    /// Return the body reader during a cache admission (miss/expired) which decouples the downstream
    /// read and upstream cache write
    pub fn miss_body_reader(&mut self) -> Option<&mut HitHandler> {
        match self.phase {
            CachePhase::Miss | CachePhase::Expired => {
                let inner_enabled = self.inner_enabled_mut();
                if inner_enabled.storage.support_streaming_partial_write() {
                    inner_enabled.body_reader.as_mut()
                } else {
                    // body_reader could be set even when the storage doesn't support streaming
                    // Expired cache would have the reader set.
                    None
                }
            }
            _ => None,
        }
    }

    /// Return whether the underlying storage backend supports streaming partial write.
    ///
    /// Returns None if cache is not enabled.
    pub fn support_streaming_partial_write(&self) -> Option<bool> {
        self.inner.as_ref().and_then(|inner| {
            inner
                .enabled_ctx
                .as_ref()
                .map(|c| c.storage.support_streaming_partial_write())
        })
    }

    /// Call this when cache hit is fully read.
    ///
    /// This call will release resource if any and log the timing in tracing if set.
    /// # Panic
    /// Panic in phases where there is no cache hit.
    pub async fn finish_hit_handler(&mut self) -> Result<()> {
        match self.phase {
            CachePhase::Hit
            | CachePhase::Miss
            | CachePhase::Expired
            | CachePhase::Stale
            | CachePhase::StaleUpdating
            | CachePhase::Revalidated
            | CachePhase::RevalidatedNoCache(_) => {
                let inner = self.inner_mut();
                let inner_enabled = inner.enabled_ctx.as_mut().expect("cache enabled");
                if inner_enabled.body_reader.is_none() {
                    // already finished, we allow calling this function more than once
                    return Ok(());
                }
                let body_reader = inner_enabled.body_reader.take().unwrap();
                let key = inner.key.as_ref().unwrap();
                let result = body_reader
                    .finish(
                        inner_enabled.storage,
                        key,
                        &inner_enabled.traces.hit_span.handle(),
                    )
                    .await;
                inner_enabled.traces.finish_hit_span();
                result
            }
            _ => panic!("wrong phase {:?}", self.phase),
        }
    }

    /// Set the [MissHandler] according to cache_key and meta, can only call once
    pub async fn set_miss_handler(&mut self) -> Result<()> {
        match self.phase {
            // set_miss_handler() needs to be called after set_cache_meta() (which change Stale to Expire).
            // This is an artificial rule to enforce the state transitions
            CachePhase::Miss | CachePhase::Expired => {
                let inner = self.inner_mut();
                let inner_enabled = inner
                    .enabled_ctx
                    .as_mut()
                    .expect("cache enabled on miss and expired");
                if inner_enabled.miss_handler.is_some() {
                    panic!("write handler is already set")
                }
                let meta = inner_enabled.meta.as_ref().unwrap();
                let key = inner.key.as_ref().unwrap();
                let miss_handler = inner_enabled
                    .storage
                    .get_miss_handler(key, meta, &inner_enabled.traces.get_miss_span())
                    .await?;

                inner_enabled.miss_handler = Some(miss_handler);

                if inner_enabled.storage.support_streaming_partial_write() {
                    // If a reader can access partial write, the cache lock can be released here
                    // to let readers start reading the body.
                    if let Some(lock_ctx) = inner_enabled.lock_ctx.as_mut() {
                        let lock = lock_ctx.lock.take();
                        if let Some(Locked::Write(permit)) = lock {
                            lock_ctx.cache_lock.release(key, permit, LockStatus::Done);
                        }
                    }
                    // Downstream read and upstream write can be decoupled
                    let body_reader = inner_enabled
                        .storage
                        .lookup_streaming_write(
                            key,
                            inner_enabled
                                .miss_handler
                                .as_ref()
                                .expect("miss handler already set")
                                .streaming_write_tag(),
                            &inner_enabled.traces.get_miss_span(),
                        )
                        .await?;

                    if let Some((_meta, body_reader)) = body_reader {
                        inner_enabled.body_reader = Some(body_reader);
                    } else {
                        // body_reader should exist now because streaming_partial_write is to support it
                        panic!("unable to get body_reader for {:?}", meta);
                    }
                }
                Ok(())
            }
            _ => panic!("wrong phase {:?}", self.phase),
        }
    }

    /// Return the [MissHandler] to write the response body to cache.
    ///
    /// `None`: the handler has not been set or already finished
    pub fn miss_handler(&mut self) -> Option<&mut MissHandler> {
        match self.phase {
            CachePhase::Miss | CachePhase::Expired => {
                self.inner_enabled_mut().miss_handler.as_mut()
            }
            _ => panic!("wrong phase {:?}", self.phase),
        }
    }

    /// Finish cache admission
    ///
    /// If [self] is dropped without calling this, the cache admission is considered incomplete and
    /// should be cleaned up.
    ///
    /// This call will also trigger eviction if set.
    pub async fn finish_miss_handler(&mut self) -> Result<()> {
        match self.phase {
            CachePhase::Miss | CachePhase::Expired => {
                let inner = self.inner_mut();
                let inner_enabled = inner
                    .enabled_ctx
                    .as_mut()
                    .expect("cache enabled on miss and expired");
                if inner_enabled.miss_handler.is_none() {
                    // already finished, we allow calling this function more than once
                    return Ok(());
                }
                let miss_handler = inner_enabled.miss_handler.take().unwrap();
                let size = miss_handler.finish().await?;
                let key = inner
                    .key
                    .as_ref()
                    .expect("key set by miss or expired phase");
                if let Some(lock_ctx) = inner_enabled.lock_ctx.as_mut() {
                    let lock = lock_ctx.lock.take();
                    if let Some(Locked::Write(permit)) = lock {
                        // no need to call r.unlock() because release() will call it
                        // r is a guard to make sure the lock is unlocked when this request is dropped
                        lock_ctx.cache_lock.release(key, permit, LockStatus::Done);
                    }
                }
                if let Some(eviction) = inner_enabled.eviction {
                    let cache_key = key.to_compact();
                    let meta = inner_enabled.meta.as_ref().unwrap();
                    let evicted = match size {
                        MissFinishType::Created(size) => {
                            eviction.admit(cache_key, size, meta.0.internal.fresh_until)
                        }
                        MissFinishType::Appended(size, max_size) => {
                            eviction.increment_weight(&cache_key, size, max_size)
                        }
                    };
                    // actual eviction can be done async
                    let span = inner_enabled.traces.child("eviction");
                    let handle = span.handle();
                    let storage = inner_enabled.storage;
                    tokio::task::spawn(async move {
                        for item in evicted {
                            if let Err(e) = storage.purge(&item, PurgeType::Eviction, &handle).await
                            {
                                warn!("Failed to purge {item} during eviction for finish miss handler: {e}");
                            }
                        }
                    });
                }
                inner_enabled.traces.finish_miss_span();
                Ok(())
            }
            _ => panic!("wrong phase {:?}", self.phase),
        }
    }

    /// Set the [CacheMeta] of the cache
    pub fn set_cache_meta(&mut self, meta: CacheMeta) {
        match self.phase {
            // TODO: store the staled meta somewhere else for future use?
            CachePhase::Stale | CachePhase::Miss => {
                let inner_enabled = self.inner_enabled_mut();
                // TODO: have a separate expired span?
                inner_enabled.traces.log_meta_in_miss_span(&meta);
                inner_enabled.meta = Some(meta);
            }
            _ => panic!("wrong phase {:?}", self.phase),
        }
        if self.phase == CachePhase::Stale {
            self.phase = CachePhase::Expired;
        }
    }

    /// Set the [CacheMeta] of the cache after revalidation.
    ///
    /// Certain info such as the original cache admission time will be preserved. Others will
    /// be replaced by the input `meta`.
    pub async fn revalidate_cache_meta(&mut self, mut meta: CacheMeta) -> Result<bool> {
        let result = match self.phase {
            CachePhase::Stale => {
                let inner = self.inner_mut();
                let inner_enabled = inner
                    .enabled_ctx
                    .as_mut()
                    .expect("stale phase has cache enabled");
                // TODO: we should keep old meta in place, just use new one to update it
                // that requires cacheable_filter to take a mut header and just return InternalMeta

                // update new meta with old meta's created time
                let old_meta = inner_enabled.meta.take().unwrap();
                let created = old_meta.0.internal.created;
                meta.0.internal.created = created;
                // meta.internal.updated was already set to new meta's `created`,
                // no need to set `updated` here
                // Merge old extensions with new ones. New exts take precedence if they conflict.
                let mut extensions = old_meta.0.extensions;
                extensions.extend(meta.0.extensions);
                meta.0.extensions = extensions;

                inner_enabled.meta.replace(meta);

                let mut span = inner_enabled.traces.child("update_meta");
                let result = inner_enabled
                    .storage
                    .update_meta(
                        inner.key.as_ref().unwrap(),
                        inner_enabled.meta.as_ref().unwrap(),
                        &span.handle(),
                    )
                    .await;
                span.set_tag(|| trace::Tag::new("updated", result.is_ok()));

                // regardless of result, release the cache lock
                if let Some(lock_ctx) = inner_enabled.lock_ctx.as_mut() {
                    let lock = lock_ctx.lock.take();
                    if let Some(Locked::Write(permit)) = lock {
                        lock_ctx.cache_lock.release(
                            inner.key.as_ref().expect("key set by stale phase"),
                            permit,
                            LockStatus::Done,
                        );
                    }
                }

                result
            }
            _ => panic!("wrong phase {:?}", self.phase),
        };
        self.phase = CachePhase::Revalidated;
        result
    }

    /// After a successful revalidation, update certain headers for the cached asset
    /// such as `Etag` with the fresh response header `resp`.
    pub fn revalidate_merge_header(&mut self, resp: &RespHeader) -> ResponseHeader {
        match self.phase {
            CachePhase::Stale => {
                /*
                 * https://datatracker.ietf.org/doc/html/rfc9110#section-15.4.5
                 * 304 response MUST generate ... would have been sent in a 200 ...
                 * - Content-Location, Date, ETag, and Vary
                 * - Cache-Control and Expires...
                 */
                let mut old_header = self.inner_enabled().meta.as_ref().unwrap().0.header.clone();
                let mut clone_header = |header_name: &'static str| {
                    for (i, value) in resp.headers.get_all(header_name).iter().enumerate() {
                        if i == 0 {
                            old_header
                                .insert_header(header_name, value)
                                .expect("can add valid header");
                        } else {
                            old_header
                                .append_header(header_name, value)
                                .expect("can add valid header");
                        }
                    }
                };
                clone_header("cache-control");
                clone_header("expires");
                clone_header("cache-tag");
                clone_header("cdn-cache-control");
                clone_header("etag");
                // https://datatracker.ietf.org/doc/html/rfc9111#section-4.3.4
                // "...cache MUST update its header fields with the header fields provided in the 304..."
                // But if the Vary header changes, the cached response may no longer match the
                // incoming request.
                //
                // For simplicity, ignore changing Vary in revalidation for now.
                // TODO: if we support vary during revalidation, there are a few edge cases to
                // consider (what if Vary header appears/disappears/changes)?
                //
                // clone_header("vary");
                old_header
            }
            _ => panic!("wrong phase {:?}", self.phase),
        }
    }

    /// Mark this asset uncacheable after revalidation
    pub fn revalidate_uncacheable(&mut self, header: ResponseHeader, reason: NoCacheReason) {
        match self.phase {
            CachePhase::Stale => {
                // replace cache meta header
                self.inner_enabled_mut().meta.as_mut().unwrap().0.header = header;
                // upstream request done, release write lock
                self.release_write_lock(reason);
            }
            _ => panic!("wrong phase {:?}", self.phase),
        }
        self.phase = CachePhase::RevalidatedNoCache(reason);
        // TODO: remove this asset from cache once finished?
    }

    /// Mark this asset as stale, but being updated separately from this request.
    pub fn set_stale_updating(&mut self) {
        match self.phase {
            CachePhase::Stale => self.phase = CachePhase::StaleUpdating,
            _ => panic!("wrong phase {:?}", self.phase),
        }
    }

    /// Update the variance of the [CacheMeta].
    ///
    /// Note that this process may change the lookup `key`, and eventually (when the asset is
    /// written to storage) invalidate other cached variants under the same primary key as the
    /// current asset.
    pub fn update_variance(&mut self, variance: Option<HashBinary>) {
        // If this is a cache miss, we will simply update the variance in the meta.
        //
        // If this is an expired response, we will have to consider a few cases:
        //
        // **Case 1**: Variance was absent, but caller sets it now.
        // We will just insert it into the meta. The current asset becomes the primary variant.
        // Because the current location of the asset is already the primary variant, nothing else
        // needs to be done.
        //
        // **Case 2**: Variance was present, but it changed or was removed.
        // We want the current asset to take over the primary slot, in order to invalidate all
        // other variants derived under the old Vary.
        //
        // **Case 3**: Variance did not change.
        // Nothing needs to happen.
        let inner = match self.phase {
            CachePhase::Miss | CachePhase::Expired => self.inner_mut(),
            _ => panic!("wrong phase {:?}", self.phase),
        };
        let inner_enabled = inner
            .enabled_ctx
            .as_mut()
            .expect("cache enabled on miss and expired");

        // Update the variance in the meta
        if let Some(variance_hash) = variance.as_ref() {
            inner_enabled
                .meta
                .as_mut()
                .unwrap()
                .set_variance_key(*variance_hash);
        } else {
            inner_enabled.meta.as_mut().unwrap().remove_variance();
        }

        // Change the lookup `key` if necessary, in order to admit asset into the primary slot
        // instead of the secondary slot.
        let key = inner.key.as_ref().unwrap();
        if let Some(old_variance) = key.get_variance_key().as_ref() {
            // This is a secondary variant slot.
            if Some(*old_variance) != variance.as_ref() {
                // This new variance does not match the variance in the cache key we used to look
                // up this asset.
                // Drop the cache lock to avoid leaving a dangling lock
                // (because we locked with the old cache key for the secondary slot)
                // TODO: maybe we should try to signal waiting readers to compete for the primary key
                // lock instead? we will not be modifying this secondary slot so it's not actually
                // ready for readers
                if let Some(lock_ctx) = inner_enabled.lock_ctx.as_mut() {
                    if let Some(Locked::Write(permit)) = lock_ctx.lock.take() {
                        lock_ctx.cache_lock.release(key, permit, LockStatus::Done);
                    }
                }
                // Remove the `variance` from the `key`, so that we admit this asset into the
                // primary slot. (`key` is used to tell storage where to write the data.)
                inner.key.as_mut().unwrap().remove_variance_key();
            }
        }
    }

    /// Return the [CacheMeta] of this asset
    ///
    /// # Panic
    /// Panic in phases which has no cache meta.
    pub fn cache_meta(&self) -> &CacheMeta {
        match self.phase {
            // TODO: allow in Bypass phase?
            CachePhase::Stale
            | CachePhase::StaleUpdating
            | CachePhase::Expired
            | CachePhase::Hit
            | CachePhase::Revalidated
            | CachePhase::RevalidatedNoCache(_) => self.inner_enabled().meta.as_ref().unwrap(),
            CachePhase::Miss => {
                // this is the async body read case, safe because body_reader is only set
                // after meta is retrieved
                if self.inner_enabled().body_reader.is_some() {
                    self.inner_enabled().meta.as_ref().unwrap()
                } else {
                    panic!("wrong phase {:?}", self.phase);
                }
            }

            _ => panic!("wrong phase {:?}", self.phase),
        }
    }

    /// Return the [CacheMeta] of this asset if any
    ///
    /// Different from [Self::cache_meta()], this function is allowed to be called in
    /// [CachePhase::Miss] phase where the cache meta maybe set.
    /// # Panic
    /// Panic in phases that shouldn't have cache meta.
    pub fn maybe_cache_meta(&self) -> Option<&CacheMeta> {
        match self.phase {
            CachePhase::Miss
            | CachePhase::Stale
            | CachePhase::StaleUpdating
            | CachePhase::Expired
            | CachePhase::Hit
            | CachePhase::Revalidated
            | CachePhase::RevalidatedNoCache(_) => self.inner_enabled().meta.as_ref(),
            _ => panic!("wrong phase {:?}", self.phase),
        }
    }

    /// Return the [`CacheKey`] of this asset if any.
    ///
    /// This is allowed to be called in any phase. If the cache key callback was not called,
    /// this will return None.
    pub fn maybe_cache_key(&self) -> Option<&CacheKey> {
        (!matches!(
            self.phase(),
            CachePhase::Disabled(NoCacheReason::NeverEnabled) | CachePhase::Uninit
        ))
        .then(|| self.cache_key())
    }

    /// Perform the cache lookup from the given cache storage with the given cache key
    ///
    /// A cache hit will return [CacheMeta] which contains the header and meta info about
    /// the cache as well as a [HitHandler] to read the cache hit body.
    /// # Panic
    /// Panic in other phases.
    pub async fn cache_lookup(&mut self) -> Result<Option<(CacheMeta, HitHandler)>> {
        match self.phase {
            // Stale is allowed here because stale-> cache_lock -> lookup again
            CachePhase::CacheKey | CachePhase::Stale => {
                let inner = self
                    .inner
                    .as_mut()
                    .expect("Cache phase is checked and should have inner");
                let inner_enabled = inner
                    .enabled_ctx
                    .as_mut()
                    .expect("Cache enabled on cache_lookup");
                let mut span = inner_enabled.traces.child("lookup");
                let key = inner.key.as_ref().unwrap(); // safe, this phase should have cache key
                let now = Instant::now();
                let result = inner_enabled.storage.lookup(key, &span.handle()).await?;
                // one request may have multiple lookups
                self.digest.add_lookup_duration(now.elapsed());
                let result = result.and_then(|(meta, header)| {
                    if let Some(ts) = inner_enabled.valid_after {
                        if meta.created() < ts {
                            span.set_tag(|| trace::Tag::new("not valid", true));
                            return None;
                        }
                    }
                    Some((meta, header))
                });
                if result.is_none() {
                    if let Some(lock_ctx) = inner_enabled.lock_ctx.as_mut() {
                        lock_ctx.lock = Some(lock_ctx.cache_lock.lock(key, false));
                    }
                }
                span.set_tag(|| trace::Tag::new("found", result.is_some()));
                Ok(result)
            }
            _ => panic!("wrong phase {:?}", self.phase),
        }
    }

    /// Update variance and see if the meta matches the current variance
    ///
    /// `cache_lookup() -> compute vary hash -> cache_vary_lookup()`
    /// This function allows callers to compute vary based on the initial cache hit.
    /// `meta` should be the ones returned from the initial cache_lookup()
    /// - return true if the meta is the variance.
    /// - return false if the current meta doesn't match the variance, need to cache_lookup() again
    pub fn cache_vary_lookup(&mut self, variance: HashBinary, meta: &CacheMeta) -> bool {
        match self.phase {
            // Stale is allowed here because stale-> cache_lock -> lookup again
            CachePhase::CacheKey | CachePhase::Stale => {
                let inner = self.inner_mut();
                // make sure that all variances found are fresher than this asset
                // this is because when purging all the variance, only the primary slot is deleted
                // the created TS of the primary is the tombstone of all the variances
                inner
                    .enabled_ctx
                    .as_mut()
                    .expect("cache enabled")
                    .valid_after = Some(meta.created());

                // update vary
                let key = inner.key.as_mut().unwrap();
                // if no variance was previously set, then this is the first cache hit
                let is_initial_cache_hit = key.get_variance_key().is_none();
                key.set_variance_key(variance);
                let variance_binary = key.variance_bin();
                let matches_variance = meta.variance() == variance_binary;

                // We should remove the variance in the lookup `key` if this is the primary variant
                // slot. We know this is the primary variant slot if this is the initial cache hit,
                // AND the variance in the `key` already matches the `meta`'s.
                //
                // For the primary variant slot, the storage backend needs to use the primary key
                // for both cache lookup and updating the meta. Otherwise it will look for the
                // asset in the wrong location during revalidation.
                //
                // We can recreate the "full" cache key by using the meta's variance, if needed.
                if matches_variance && is_initial_cache_hit {
                    inner.key.as_mut().unwrap().remove_variance_key();
                }

                matches_variance
            }
            _ => panic!("wrong phase {:?}", self.phase),
        }
    }

    /// Whether this request is behind a cache lock in order to wait for another request to read the
    /// asset.
    pub fn is_cache_locked(&self) -> bool {
        matches!(
            self.inner_enabled()
                .lock_ctx
                .as_ref()
                .and_then(|l| l.lock.as_ref()),
            Some(Locked::Read(_))
        )
    }

    /// Whether this request is the leader request to fetch the assets for itself and other requests
    /// behind the cache lock.
    pub fn is_cache_lock_writer(&self) -> bool {
        matches!(
            self.inner_enabled()
                .lock_ctx
                .as_ref()
                .and_then(|l| l.lock.as_ref()),
            Some(Locked::Write(_))
        )
    }

    /// Take the write lock from this request to transfer it to another one.
    /// # Panic
    ///  Call is_cache_lock_writer() to check first, will panic otherwise.
    pub fn take_write_lock(&mut self) -> (WritePermit, &'static CacheKeyLockImpl) {
        let lock_ctx = self
            .inner_enabled_mut()
            .lock_ctx
            .as_mut()
            .expect("take_write_lock() called without cache lock");
        let lock = lock_ctx
            .lock
            .take()
            .expect("take_write_lock() called without lock");
        match lock {
            Locked::Write(w) => (w, lock_ctx.cache_lock),
            Locked::Read(_) => panic!("take_write_lock() called on read lock"),
        }
    }

    /// Set the write lock, which is usually transferred from [Self::take_write_lock()]
    ///
    /// # Panic
    /// Panics if cache lock was not originally configured for this request.
    // TODO: it may make sense to allow configuring the CacheKeyLock here too that the write permit
    // is associated with
    // (The WritePermit comes from the CacheKeyLock and should be used when releasing from the CacheKeyLock,
    // shouldn't be possible to give a WritePermit to a request using a different CacheKeyLock)
    pub fn set_write_lock(&mut self, write_lock: WritePermit) {
        if let Some(lock_ctx) = self.inner_enabled_mut().lock_ctx.as_mut() {
            lock_ctx.lock.replace(Locked::Write(write_lock));
        }
    }

    /// Whether this request's cache hit is staled
    fn has_staled_asset(&self) -> bool {
        matches!(self.phase, CachePhase::Stale | CachePhase::StaleUpdating)
    }

    /// Whether this asset is staled and stale if error is allowed
    pub fn can_serve_stale_error(&self) -> bool {
        self.has_staled_asset() && self.cache_meta().serve_stale_if_error(SystemTime::now())
    }

    /// Whether this asset is staled and stale while revalidate is allowed.
    pub fn can_serve_stale_updating(&self) -> bool {
        self.has_staled_asset()
            && self
                .cache_meta()
                .serve_stale_while_revalidate(SystemTime::now())
    }

    /// Wait for the cache read lock to be unlocked
    /// # Panic
    /// Check [Self::is_cache_locked()], panic if this request doesn't have a read lock.
    pub async fn cache_lock_wait(&mut self) -> LockStatus {
        let inner_enabled = self.inner_enabled_mut();
        let mut span = inner_enabled.traces.child("cache_lock");
        // should always call is_cache_locked() before this function, which should guarantee that
        // the inner cache has a read lock and lock ctx
        let (read_lock, status) = if let Some(lock_ctx) = inner_enabled.lock_ctx.as_mut() {
            let lock = lock_ctx.lock.take(); // remove the lock from self
            if let Some(Locked::Read(r)) = lock {
                let now = Instant::now();
                // it's possible for a request to be locked more than once,
                // so wait the remainder of our configured timeout
                let status = if let Some(wait_timeout) = lock_ctx.wait_timeout {
                    let wait_timeout =
                        wait_timeout.saturating_sub(self.lock_duration().unwrap_or(Duration::ZERO));
                    match timeout(wait_timeout, r.wait()).await {
                        Ok(()) => r.lock_status(),
                        Err(_) => LockStatus::WaitTimeout,
                    }
                } else {
                    r.wait().await;
                    r.lock_status()
                };
                self.digest.add_lock_duration(now.elapsed());
                (r, status)
            } else {
                panic!("cache_lock_wait on wrong type of lock")
            }
        } else {
            panic!("cache_lock_wait without cache lock")
        };
        if let Some(lock_ctx) = self.inner_enabled().lock_ctx.as_ref() {
            lock_ctx
                .cache_lock
                .trace_lock_wait(&mut span, &read_lock, status);
        }
        status
    }

    /// How long did this request wait behind the read lock
    pub fn lock_duration(&self) -> Option<Duration> {
        self.digest.lock_duration
    }

    /// How long did this request spent on cache lookup and reading the header
    pub fn lookup_duration(&self) -> Option<Duration> {
        self.digest.lookup_duration
    }

    /// Delete the asset from the cache storage
    /// # Panic
    /// Need to be called after the cache key is set. Panic otherwise.
    pub async fn purge(&self) -> Result<bool> {
        match self.phase {
            CachePhase::CacheKey => {
                let inner = self.inner();
                let inner_enabled = self.inner_enabled();
                let span = inner_enabled.traces.child("purge");
                let key = inner.key.as_ref().unwrap().to_compact();
                Self::purge_impl(inner_enabled.storage, inner_enabled.eviction, &key, span).await
            }
            _ => panic!("wrong phase {:?}", self.phase),
        }
    }

    /// Delete the asset from the cache storage via a spawned task.
    /// Returns corresponding `JoinHandle` of that task.
    /// # Panic
    /// Need to be called after the cache key is set. Panic otherwise.
    pub fn spawn_async_purge(
        &self,
        context: &'static str,
    ) -> tokio::task::JoinHandle<Result<bool>> {
        if matches!(self.phase, CachePhase::Disabled(_) | CachePhase::Uninit) {
            panic!("wrong phase {:?}", self.phase);
        }

        let inner_enabled = self.inner_enabled();
        let span = inner_enabled.traces.child("purge");
        let key = self.inner().key.as_ref().unwrap().to_compact();
        let storage = inner_enabled.storage;
        let eviction = inner_enabled.eviction;
        tokio::task::spawn(async move {
            Self::purge_impl(storage, eviction, &key, span)
                .await
                .map_err(|e| {
                    warn!("Failed to purge {key} (context: {context}): {e}");
                    e
                })
        })
    }

    async fn purge_impl(
        storage: &'static (dyn storage::Storage + Sync),
        eviction: Option<&'static (dyn eviction::EvictionManager + Sync)>,
        key: &CompactCacheKey,
        mut span: Span,
    ) -> Result<bool> {
        let result = storage
            .purge(key, PurgeType::Invalidation, &span.handle())
            .await;
        let purged = matches!(result, Ok(true));
        // need to inform eviction manager if asset was removed
        if let Some(eviction) = eviction.as_ref() {
            if purged {
                eviction.remove(key);
            }
        }
        span.set_tag(|| trace::Tag::new("purged", purged));
        result
    }

    /// Check the cacheable prediction
    ///
    /// Return true if the predictor is not set
    pub fn cacheable_prediction(&self) -> bool {
        if let Some(predictor) = self.inner().predictor {
            predictor.cacheable_prediction(self.cache_key())
        } else {
            true
        }
    }

    /// Tell the predictor that this response, which is previously predicted to be uncacheable,
    /// is cacheable now.
    pub fn response_became_cacheable(&self) {
        if let Some(predictor) = self.inner().predictor {
            predictor.mark_cacheable(self.cache_key());
        }
    }

    /// Tell the predictor that this response is uncacheable so that it will know next time
    /// this request arrives.
    pub fn response_became_uncacheable(&self, reason: NoCacheReason) {
        if let Some(predictor) = self.inner().predictor {
            predictor.mark_uncacheable(self.cache_key(), reason);
        }
    }

    /// Tag all spans as being part of a subrequest.
    pub fn tag_as_subrequest(&mut self) {
        self.inner_enabled_mut()
            .traces
            .cache_span
            .set_tag(|| Tag::new("is_subrequest", true))
    }
}


================================================
FILE: pingora-cache/src/lock.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Cache lock

use crate::{hashtable::ConcurrentHashTable, key::CacheHashKey, CacheKey};
use crate::{Span, Tag};

use http::Extensions;
use pingora_timeout::timeout;
use std::sync::Arc;
use std::time::Duration;

pub type CacheKeyLockImpl = dyn CacheKeyLock + Send + Sync;

pub trait CacheKeyLock {
    /// Try to lock a cache fetch
    ///
    /// If `stale_writer` is true, this fetch is to revalidate an asset already in cache.
    /// Else this fetch was a cache miss (i.e. not found via lookup, or force missed).
    ///
    /// Users should call after a cache miss before fetching the asset.
    /// The returned [Locked] will tell the caller either to fetch or wait.
    fn lock(&self, key: &CacheKey, stale_writer: bool) -> Locked;

    /// Release a lock for the given key
    ///
    /// When the write lock is dropped without being released, the read lock holders will consider
    /// it to be failed so that they will compete for the write lock again.
    fn release(&self, key: &CacheKey, permit: WritePermit, reason: LockStatus);

    /// Set tags on a trace span for the cache lock wait.
    fn trace_lock_wait(&self, span: &mut Span, _read_lock: &ReadLock, lock_status: LockStatus) {
        let tag_value: &'static str = lock_status.into();
        span.set_tag(|| Tag::new("status", tag_value));
    }

    /// Set a lock status for a custom `NoCacheReason`.
    fn custom_lock_status(&self, _custom_no_cache: &'static str) -> LockStatus {
        // treat custom no cache reasons as GiveUp by default
        // (like OriginNotCache)
        LockStatus::GiveUp
    }
}

const N_SHARDS: usize = 16;

/// The global cache locking manager
#[derive(Debug)]
pub struct CacheLock {
    lock_table: ConcurrentHashTable<LockStub, N_SHARDS>,
    // fixed lock timeout values for now
    age_timeout_default: Duration,
}

/// A struct representing locked cache access
#[derive(Debug)]
pub enum Locked {
    /// The writer is allowed to fetch the asset
    Write(WritePermit),
    /// The reader waits for the writer to fetch the asset
    Read(ReadLock),
}

impl Locked {
    /// Is this a write lock
    pub fn is_write(&self) -> bool {
        matches!(self, Self::Write(_))
    }
}

impl CacheLock {
    /// Create a new [CacheLock] with the given lock timeout
    ///
    /// Age timeout refers to how long a writer has been holding onto a particular lock, and wait
    /// timeout refers to how long a reader may hold onto any number of locks before giving up.
    /// When either timeout is reached, the read locks are automatically unlocked.
    pub fn new_boxed(age_timeout: Duration) -> Box<Self> {
        Box::new(CacheLock {
            lock_table: ConcurrentHashTable::new(),
            age_timeout_default: age_timeout,
        })
    }

    /// Create a new [CacheLock] with the given lock timeout
    ///
    /// Age timeout refers to how long a writer has been holding onto a particular lock, and wait
    /// timeout refers to how long a reader may hold onto any number of locks before giving up.
    /// When either timeout is reached, the read locks are automatically unlocked.
    pub fn new(age_timeout_default: Duration) -> Self {
        CacheLock {
            lock_table: ConcurrentHashTable::new(),
            age_timeout_default,
        }
    }
}

impl CacheKeyLock for CacheLock {
    fn lock(&self, key: &CacheKey, stale_writer: bool) -> Locked {
        let hash = key.combined_bin();
        let key = u128::from_be_bytes(hash); // endianness doesn't matter
        let table = self.lock_table.get(key);
        if let Some(lock) = table.read().get(&key) {
            // already has an ongoing request
            // If the lock status is dangling or timeout, the lock will _remain_ in the table
            // and readers should attempt to replace it.
            // In the case of writer timeout, any remaining readers that were waiting on THIS
            // LockCore should have (or are about to) timed out on their own.
            // Finding a Timeout status means that THIS writer's lock already expired, so future
            // requests ought to recreate the lock.
            if !matches!(
                lock.0.lock_status(),
                LockStatus::Dangling | LockStatus::AgeTimeout
            ) {
                return Locked::Read(lock.read_lock());
            }
            // Dangling: the previous writer quit without unlocking the lock. Requests should
            // compete for the write lock again.
        }

        let mut table = table.write();
        // check again in case another request already added it
        if let Some(lock) = table.get(&key) {
            if !matches!(
                lock.0.lock_status(),
                LockStatus::Dangling | LockStatus::AgeTimeout
            ) {
                return Locked::Read(lock.read_lock());
            }
        }
        let (permit, stub) =
            WritePermit::new(self.age_timeout_default, stale_writer, Extensions::new());
        table.insert(key, stub);
        Locked::Write(permit)
    }

    fn release(&self, key: &CacheKey, mut permit: WritePermit, reason: LockStatus) {
        let hash = key.combined_bin();
        let key = u128::from_be_bytes(hash); // endianness doesn't matter
        if permit.lock.lock_status() == LockStatus::AgeTimeout {
            // if lock age timed out, then readers are capable of
            // replacing the lock associated with this permit from the lock table
            // (see lock() implementation)
            // keep the lock status as Timeout accordingly when unlocking
            // (because we aren't removing it from the lock_table)
            permit.unlock(LockStatus::AgeTimeout);
        } else if let Some(_lock) = self.lock_table.write(key).remove(&key) {
            permit.unlock(reason);
        }
        // these situations above should capture all possible options,
        // else dangling cache lock may start
    }
}

use log::warn;
use std::sync::atomic::{AtomicU8, Ordering};
use std::time::Instant;
use strum::{FromRepr, IntoStaticStr};
use tokio::sync::Semaphore;

/// Status which the read locks could possibly see.
#[derive(Debug, Copy, Clone, PartialEq, Eq, IntoStaticStr, FromRepr)]
#[repr(u8)]
pub enum LockStatus {
    /// Waiting for the writer to populate the asset
    Waiting = 0,
    /// The writer finishes, readers can start
    Done = 1,
    /// The writer encountered error, such as network issue. A new writer will be elected.
    TransientError = 2,
    /// The writer observed that no cache lock is needed (e.g., uncacheable), readers should start
    /// to fetch independently without a new writer
    GiveUp = 3,
    /// The write lock is dropped without being unlocked
    Dangling = 4,
    /// Reader has held onto cache locks for too long, give up
    WaitTimeout = 5,
    /// The lock is held for too long by the writer
    AgeTimeout = 6,
}

impl From<LockStatus> for u8 {
    fn from(l: LockStatus) -> u8 {
        match l {
            LockStatus::Waiting => 0,
            LockStatus::Done => 1,
            LockStatus::TransientError => 2,
            LockStatus::GiveUp => 3,
            LockStatus::Dangling => 4,
            LockStatus::WaitTimeout => 5,
            LockStatus::AgeTimeout => 6,
        }
    }
}

impl From<u8> for LockStatus {
    fn from(v: u8) -> Self {
        Self::from_repr(v).unwrap_or(Self::GiveUp)
    }
}

#[derive(Debug)]
pub struct LockCore {
    pub lock_start: Instant,
    pub age_timeout: Duration,
    pub(super) lock: Semaphore,
    // use u8 for Atomic enum
    lock_status: AtomicU8,
    stale_writer: bool,
    extensions: Extensions,
}

impl LockCore {
    pub fn new_arc(timeout: Duration, stale_writer: bool, extensions: Extensions) -> Arc<Self> {
        Arc::new(LockCore {
            lock: Semaphore::new(0),
            age_timeout: timeout,
            lock_start: Instant::now(),
            lock_status: AtomicU8::new(LockStatus::Waiting.into()),
            stale_writer,
            extensions,
        })
    }

    pub fn locked(&self) -> bool {
        self.lock.available_permits() == 0
    }

    pub fn unlock(&self, reason: LockStatus) {
        assert!(
            reason != LockStatus::WaitTimeout,
            "WaitTimeout is not stored in LockCore"
        );
        self.lock_status.store(reason.into(), Ordering::SeqCst);
        // Any small positive number will do, 10 is used for RwLock as well.
        // No need to wake up all at once.
        self.lock.add_permits(10);
    }

    pub fn lock_status(&self) -> LockStatus {
        self.lock_status.load(Ordering::SeqCst).into()
    }

    /// Was this lock for a stale cache fetch writer?
    pub fn stale_writer(&self) -> bool {
        self.stale_writer
    }

    pub fn extensions(&self) -> &Extensions {
        &self.extensions
    }
}

// all 3 structs below are just Arc<LockCore> with different interfaces

/// ReadLock: the requests who get it need to wait until it is released
#[derive(Debug)]
pub struct ReadLock(Arc<LockCore>);

impl ReadLock {
    /// Wait for the writer to release the lock
    pub async fn wait(&self) {
        if !self.locked() {
            return;
        }

        // FIXME: for now it is the awkward responsibility of the ReadLock to set the
        // timeout status on the lock itself because the write permit cannot lock age
        // timeout on its own
        // TODO: need to be careful not to wake everyone up at the same time
        // (maybe not an issue because regular cache lock release behaves that way)
        if let Some(duration) = self.0.age_timeout.checked_sub(self.0.lock_start.elapsed()) {
            match timeout(duration, self.0.lock.acquire()).await {
                Ok(Ok(_)) => { // permit is returned to Semaphore right away
                }
                Ok(Err(e)) => {
                    warn!("error acquiring semaphore {e:?}")
                }
                Err(_) => {
                    self.0
                        .lock_status
                        .store(LockStatus::AgeTimeout.into(), Ordering::SeqCst);
                }
            }
        } else {
            // expiration has already occurred, store timeout status
            self.0
                .lock_status
                .store(LockStatus::AgeTimeout.into(), Ordering::SeqCst);
        }
    }

    /// Test if it is still locked
    pub fn locked(&self) -> bool {
        self.0.locked()
    }

    /// Whether the lock is expired, e.g., the writer has been holding the lock for too long
    pub fn expired(&self) -> bool {
        // NOTE: this is whether the lock is currently expired
        // not whether it was timed out during wait()
        self.0.lock_start.elapsed() >= self.0.age_timeout
    }

    /// The current status of the lock
    pub fn lock_status(&self) -> LockStatus {
        let status = self.0.lock_status();
        if matches!(status, LockStatus::Waiting) && self.expired() {
            LockStatus::AgeTimeout
        } else {
            status
        }
    }

    pub fn extensions(&self) -> &Extensions {
        self.0.extensions()
    }
}

/// WritePermit: requires who get it need to populate the cache and then release it
#[derive(Debug)]
pub struct WritePermit {
    lock: Arc<LockCore>,
    finished: bool,
}

impl WritePermit {
    /// Create a new lock, with a permit to be given to the associated writer.
    pub fn new(
        timeout: Duration,
        stale_writer: bool,
        extensions: Extensions,
    ) -> (WritePermit, LockStub) {
        let lock = LockCore::new_arc(timeout, stale_writer, extensions);
        let stub = LockStub(lock.clone());
        (
            WritePermit {
                lock,
                finished: false,
            },
            stub,
        )
    }

    /// Was this lock for a stale cache fetch writer?
    pub fn stale_writer(&self) -> bool {
        self.lock.stale_writer()
    }

    pub fn unlock(&mut self, reason: LockStatus) {
        self.finished = true;
        self.lock.unlock(reason);
    }

    pub fn lock_status(&self) -> LockStatus {
        self.lock.lock_status()
    }

    pub fn extensions(&self) -> &Extensions {
        self.lock.extensions()
    }
}

impl Drop for WritePermit {
    fn drop(&mut self) {
        // Writer exited without properly unlocking. We let others to compete for the write lock again
        if !self.finished {
            debug_assert!(false, "Dangling cache lock started!");
            self.unlock(LockStatus::Dangling);
        }
    }
}

#[derive(Debug)]
pub struct LockStub(pub Arc<LockCore>);
impl LockStub {
    pub fn read_lock(&self) -> ReadLock {
        ReadLock(self.0.clone())
    }

    pub fn extensions(&self) -> &Extensions {
        &self.0.extensions
    }
}

#[cfg(test)]
mod test {
    use super::*;
    use crate::CacheKey;

    #[test]
    fn test_get_release() {
        let cache_lock = CacheLock::new_boxed(Duration::from_secs(1000));
        let key1 = CacheKey::new("", "a", "1");
        let locked1 = cache_lock.lock(&key1, false);
        assert!(locked1.is_write()); // write permit
        let locked2 = cache_lock.lock(&key1, false);
        assert!(!locked2.is_write()); // read lock
        if let Locked::Write(permit) = locked1 {
            cache_lock.release(&key1, permit, LockStatus::Done);
        }
        let locked3 = cache_lock.lock(&key1, false);
        assert!(locked3.is_write()); // write permit again
        if let Locked::Write(permit) = locked3 {
            cache_lock.release(&key1, permit, LockStatus::Done);
        }
    }

    #[tokio::test]
    async fn test_lock() {
        let cache_lock = CacheLock::new_boxed(Duration::from_secs(1000));
        let key1 = CacheKey::new("", "a", "1");
        let mut permit = match cache_lock.lock(&key1, false) {
            Locked::Write(w) => w,
            _ => panic!(),
        };
        let lock = match cache_lock.lock(&key1, false) {
            Locked::Read(r) => r,
            _ => panic!(),
        };
        assert!(lock.locked());
        let handle = tokio::spawn(async move {
            lock.wait().await;
            assert_eq!(lock.lock_status(), LockStatus::Done);
        });
        permit.unlock(LockStatus::Done);
        handle.await.unwrap(); // check lock is unlocked and the task is returned
    }

    #[tokio::test]
    async fn test_lock_timeout() {
        let cache_lock = CacheLock::new_boxed(Duration::from_secs(1));
        let key1 = CacheKey::new("", "a", "1");
        let mut permit = match cache_lock.lock(&key1, false) {
            Locked::Write(w) => w,
            _ => panic!(),
        };
        let lock = match cache_lock.lock(&key1, false) {
            Locked::Read(r) => r,
            _ => panic!(),
        };
        assert!(lock.locked());

        let handle = tokio::spawn(async move {
            // timed out
            lock.wait().await;
            assert_eq!(lock.lock_status(), LockStatus::AgeTimeout);
        });

        tokio::time::sleep(Duration::from_millis(2100)).await;

        handle.await.unwrap(); // check lock is timed out

        // expired lock - we will be able to install a new lock instead
        let mut permit2 = match cache_lock.lock(&key1, false) {
            Locked::Write(w) => w,
            _ => panic!(),
        };
        let lock2 = match cache_lock.lock(&key1, false) {
            Locked::Read(r) => r,
            _ => panic!(),
        };
        assert!(lock2.locked());
        let handle = tokio::spawn(async move {
            // timed out
            lock2.wait().await;
            assert_eq!(lock2.lock_status(), LockStatus::Done);
        });

        permit.unlock(LockStatus::Done);
        permit2.unlock(LockStatus::Done);
        handle.await.unwrap();
    }

    #[tokio::test]
    async fn test_lock_expired_release() {
        let cache_lock = CacheLock::new_boxed(Duration::from_secs(1));
        let key1 = CacheKey::new("", "a", "1");
        let permit = match cache_lock.lock(&key1, false) {
            Locked::Write(w) => w,
            _ => panic!(),
        };

        let lock = match cache_lock.lock(&key1, false) {
            Locked::Read(r) => r,
            _ => panic!(),
        };
        assert!(lock.locked());
        let handle = tokio::spawn(async move {
            // timed out
            lock.wait().await;
            assert_eq!(lock.lock_status(), LockStatus::AgeTimeout);
        });

        tokio::time::sleep(Duration::from_millis(1100)).await; // let lock age time out
        handle.await.unwrap(); // check lock is timed out

        // writer finally finishes
        cache_lock.release(&key1, permit, LockStatus::Done);

        // can reacquire after release
        let mut permit = match cache_lock.lock(&key1, false) {
            Locked::Write(w) => w,
            _ => panic!(),
        };
        assert_eq!(permit.lock.lock_status(), LockStatus::Waiting);

        let lock2 = match cache_lock.lock(&key1, false) {
            Locked::Read(r) => r,
            _ => panic!(),
        };
        assert!(lock2.locked());
        let handle = tokio::spawn(async move {
            // timed out
            lock2.wait().await;
            assert_eq!(lock2.lock_status(), LockStatus::Done);
        });

        permit.unlock(LockStatus::Done);
        handle.await.unwrap();
    }

    #[tokio::test]
    async fn test_lock_expired_no_reader() {
        let cache_lock = CacheLock::new_boxed(Duration::from_secs(1));
        let key1 = CacheKey::new("", "a", "1");
        let mut permit = match cache_lock.lock(&key1, false) {
            Locked::Write(w) => w,
            _ => panic!(),
        };
        tokio::time::sleep(Duration::from_millis(1100)).await; // let lock age time out

        // lock expired without reader, but status is not yet set
        assert_eq!(permit.lock.lock_status(), LockStatus::Waiting);

        let lock = match cache_lock.lock(&key1, false) {
            Locked::Read(r) => r,
            _ => panic!(),
        };
        // reader expires write permit
        lock.wait().await;
        assert_eq!(lock.lock_status(), LockStatus::AgeTimeout);
        assert_eq!(permit.lock.lock_status(), LockStatus::AgeTimeout);
        permit.unlock(LockStatus::AgeTimeout);
    }

    #[tokio::test]
    async fn test_lock_concurrent() {
        let _ = env_logger::builder().is_test(true).try_init();
        // Test that concurrent attempts to compete for a lock run without issues
        let cache_lock = Arc::new(CacheLock::new_boxed(Duration::from_secs(1)));
        let key1 = CacheKey::new("", "a", "1");

        let mut handles = vec![];

        const READERS: usize = 30;
        for _ in 0..READERS {
            let key1 = key1.clone();
            let cache_lock = cache_lock.clone();
            // simulate a cache lookup / lock attempt loop
            handles.push(tokio::spawn(async move {
                // timed out
                loop {
                    match cache_lock.lock(&key1, false) {
                        Locked::Write(permit) => {
                            let _ = tokio::time::sleep(Duration::from_millis(5)).await;
                            cache_lock.release(&key1, permit, LockStatus::Done);
                            break;
                        }
                        Locked::Read(r) => {
                            r.wait().await;
                        }
                    }
                }
            }));
        }

        for handle in handles {
            handle.await.unwrap();
        }
    }
}


================================================
FILE: pingora-cache/src/max_file_size.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Set limit on the largest size to cache

use pingora_error::ErrorType;

/// Error type returned when the limit is reached.
pub const ERR_RESPONSE_TOO_LARGE: ErrorType = ErrorType::Custom("response too large");

// Body bytes tracker to adjust (predicted) cacheability,
// even if cache has been disabled.
#[derive(Debug)]
pub(crate) struct MaxFileSizeTracker {
    body_bytes: usize,
    max_size: usize,
}

impl MaxFileSizeTracker {
    // Create a new Tracker object.
    pub fn new(max_size: usize) -> MaxFileSizeTracker {
        MaxFileSizeTracker {
            body_bytes: 0,
            max_size,
        }
    }

    // Add bytes to the tracker.
    // If return value is true, the tracker bytes are under the max size allowed.
    pub fn add_body_bytes(&mut self, bytes: usize) -> bool {
        self.body_bytes += bytes;
        self.allow_caching()
    }

    pub fn max_file_size_bytes(&self) -> usize {
        self.max_size
    }

    pub fn allow_caching(&self) -> bool {
        self.body_bytes <= self.max_size
    }
}


================================================
FILE: pingora-cache/src/memory.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Hash map based in memory cache
//!
//! For testing only, not for production use

//TODO: Mark this module #[test] only

use super::*;
use crate::key::CompactCacheKey;
use crate::storage::{streaming_write::U64WriteId, HandleHit, HandleMiss};
use crate::trace::SpanHandle;

use async_trait::async_trait;
use bytes::Bytes;
use parking_lot::RwLock;
use pingora_error::*;
use std::any::Any;
use std::collections::HashMap;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;
use tokio::sync::watch;

type BinaryMeta = (Vec<u8>, Vec<u8>);

pub(crate) struct CacheObject {
    pub meta: BinaryMeta,
    pub body: Arc<Vec<u8>>,
}

pub(crate) struct TempObject {
    pub meta: BinaryMeta,
    // these are Arc because they need to continue to exist after this TempObject is removed
    pub body: Arc<RwLock<Vec<u8>>>,
    bytes_written: Arc<watch::Sender<PartialState>>, // this should match body.len()
}

impl TempObject {
    fn new(meta: BinaryMeta) -> Self {
        let (tx, _rx) = watch::channel(PartialState::Partial(0));
        TempObject {
            meta,
            body: Arc::new(RwLock::new(Vec::new())),
            bytes_written: Arc::new(tx),
        }
    }
    // this is not at all optimized
    fn make_cache_object(&self) -> CacheObject {
        let meta = self.meta.clone();
        let body = Arc::new(self.body.read().clone());
        CacheObject { meta, body }
    }
}

/// Hash map based in memory cache
///
/// For testing only, not for production use.
pub struct MemCache {
    pub(crate) cached: Arc<RwLock<HashMap<String, CacheObject>>>,
    pub(crate) temp: Arc<RwLock<HashMap<String, HashMap<u64, TempObject>>>>,
    pub(crate) last_temp_id: AtomicU64,
}

impl MemCache {
    /// Create a new [MemCache]
    pub fn new() -> Self {
        MemCache {
            cached: Arc::new(RwLock::new(HashMap::new())),
            temp: Arc::new(RwLock::new(HashMap::new())),
            last_temp_id: AtomicU64::new(0),
        }
    }
}

pub enum MemHitHandler {
    Complete(CompleteHit),
    Partial(PartialHit),
}

#[derive(Copy, Clone)]
enum PartialState {
    Partial(usize),
    Complete(usize),
}

pub struct CompleteHit {
    body: Arc<Vec<u8>>,
    done: bool,
    range_start: usize,
    range_end: usize,
}

impl CompleteHit {
    fn get(&mut self) -> Option<Bytes> {
        if self.done {
            None
        } else {
            self.done = true;
            Some(Bytes::copy_from_slice(
                &self.body.as_slice()[self.range_start..self.range_end],
            ))
        }
    }

    fn seek(&mut self, start: usize, end: Option<usize>) -> Result<()> {
        if start >= self.body.len() {
            return Error::e_explain(
                ErrorType::InternalError,
                format!("seek start out of range {start} >= {}", self.body.len()),
            );
        }
        self.range_start = start;
        if let Some(end) = end {
            // end over the actual last byte is allowed, we just need to return the actual bytes
            self.range_end = std::cmp::min(self.body.len(), end);
        }
        // seek resets read so that one handler can be used for multiple ranges
        self.done = false;
        Ok(())
    }
}

pub struct PartialHit {
    body: Arc<RwLock<Vec<u8>>>,
    bytes_written: watch::Receiver<PartialState>,
    bytes_read: usize,
}

impl PartialHit {
    async fn read(&mut self) -> Option<Bytes> {
        loop {
            let bytes_written = *self.bytes_written.borrow_and_update();
            let bytes_end = match bytes_written {
                PartialState::Partial(s) => s,
                PartialState::Complete(c) => {
                    // no more data will arrive
                    if c == self.bytes_read {
                        return None;
                    }
                    c
                }
            };
            assert!(bytes_end >= self.bytes_read);

            // more data available to read
            if bytes_end > self.bytes_read {
                let new_bytes =
                    Bytes::copy_from_slice(&self.body.read()[self.bytes_read..bytes_end]);
                self.bytes_read = bytes_end;
                return Some(new_bytes);
            }

            // wait for more data
            if self.bytes_written.changed().await.is_err() {
                // err: sender dropped, body is finished
                // FIXME: sender could drop because of an error
                return None;
            }
        }
    }
}

#[async_trait]
impl HandleHit for MemHitHandler {
    async fn read_body(&mut self) -> Result<Option<Bytes>> {
        match self {
            Self::Complete(c) => Ok(c.get()),
            Self::Partial(p) => Ok(p.read().await),
        }
    }
    async fn finish(
        self: Box<Self>, // because self is always used as a trait object
        _storage: &'static (dyn storage::Storage + Sync),
        _key: &CacheKey,
        _trace: &SpanHandle,
    ) -> Result<()> {
        Ok(())
    }

    fn can_seek(&self) -> bool {
        match self {
            Self::Complete(_) => true,
            Self::Partial(_) => false, // TODO: support seeking in partial reads
        }
    }

    fn seek(&mut self, start: usize, end: Option<usize>) -> Result<()> {
        match self {
            Self::Complete(c) => c.seek(start, end),
            Self::Partial(_) => Error::e_explain(
                ErrorType::InternalError,
                "seek not supported for partial cache",
            ),
        }
    }

    fn should_count_access(&self) -> bool {
        match self {
            // avoid counting accesses for partial reads to keep things simple
            Self::Complete(_) => true,
            Self::Partial(_) => false,
        }
    }

    fn get_eviction_weight(&self) -> usize {
        match self {
            // FIXME: just body size, also track meta size
            Self::Complete(c) => c.body.len(),
            // partial read cannot be estimated since body size is unknown
            Self::Partial(_) => 0,
        }
    }

    fn as_any(&self) -> &(dyn Any + Send + Sync) {
        self
    }

    fn as_any_mut(&mut self) -> &mut (dyn Any + Send + Sync) {
        self
    }
}

pub struct MemMissHandler {
    body: Arc<RwLock<Vec<u8>>>,
    bytes_written: Arc<watch::Sender<PartialState>>,
    // these are used only in finish() to data from temp to cache
    key: String,
    temp_id: U64WriteId,
    // key -> cache object
    cache: Arc<RwLock<HashMap<String, CacheObject>>>,
    // key -> (temp writer id -> temp object) to support concurrent writers
    temp: Arc<RwLock<HashMap<String, HashMap<u64, TempObject>>>>,
}

#[async_trait]
impl HandleMiss for MemMissHandler {
    async fn write_body(&mut self, data: bytes::Bytes, eof: bool) -> Result<()> {
        let current_bytes = match *self.bytes_written.borrow() {
            PartialState::Partial(p) => p,
            PartialState::Complete(_) => panic!("already EOF"),
        };
        self.body.write().extend_from_slice(&data);
        let written = current_bytes + data.len();
        let new_state = if eof {
            PartialState::Complete(written)
        } else {
            PartialState::Partial(written)
        };
        self.bytes_written.send_replace(new_state);
        Ok(())
    }

    async fn finish(self: Box<Self>) -> Result<MissFinishType> {
        // safe, the temp object is inserted when the miss handler is created
        let cache_object = self
            .temp
            .read()
            .get(&self.key)
            .unwrap()
            .get(&self.temp_id.into())
            .unwrap()
            .make_cache_object();
        let size = cache_object.body.len(); // FIXME: this just body size, also track meta size
        self.cache.write().insert(self.key.clone(), cache_object);
        self.temp
            .write()
            .get_mut(&self.key)
            .and_then(|map| map.remove(&self.temp_id.into()));
        Ok(MissFinishType::Created(size))
    }

    fn streaming_write_tag(&self) -> Option<&[u8]> {
        Some(self.temp_id.as_bytes())
    }
}

impl Drop for MemMissHandler {
    fn drop(&mut self) {
        self.temp
            .write()
            .get_mut(&self.key)
            .and_then(|map| map.remove(&self.temp_id.into()));
    }
}

fn hit_from_temp_obj(temp_obj: &TempObject) -> Result<Option<(CacheMeta, HitHandler)>> {
    let meta = CacheMeta::deserialize(&temp_obj.meta.0, &temp_obj.meta.1)?;
    let partial = PartialHit {
        body: temp_obj.body.clone(),
        bytes_written: temp_obj.bytes_written.subscribe(),
        bytes_read: 0,
    };
    let hit_handler = MemHitHandler::Partial(partial);
    Ok(Some((meta, Box::new(hit_handler))))
}

#[async_trait]
impl Storage for MemCache {
    async fn lookup(
        &'static self,
        key: &CacheKey,
        _trace: &SpanHandle,
    ) -> Result<Option<(CacheMeta, HitHandler)>> {
        let hash = key.combined();
        // always prefer partial read otherwise fresh asset will not be visible on expired asset
        // until it is fully updated
        // no preference on which partial read we get (if there are multiple writers)
        if let Some((_, temp_obj)) = self
            .temp
            .read()
            .get(&hash)
            .and_then(|map| map.iter().next())
        {
            hit_from_temp_obj(temp_obj)
        } else if let Some(obj) = self.cached.read().get(&hash) {
            let meta = CacheMeta::deserialize(&obj.meta.0, &obj.meta.1)?;
            let hit_handler = CompleteHit {
                body: obj.body.clone(),
                done: false,
                range_start: 0,
                range_end: obj.body.len(),
            };
            let hit_handler = MemHitHandler::Complete(hit_handler);
            Ok(Some((meta, Box::new(hit_handler))))
        } else {
            Ok(None)
        }
    }

    async fn lookup_streaming_write(
        &'static self,
        key: &CacheKey,
        streaming_write_tag: Option<&[u8]>,
        _trace: &SpanHandle,
    ) -> Result<Option<(CacheMeta, HitHandler)>> {
        let hash = key.combined();
        let write_tag: U64WriteId = streaming_write_tag
            .expect("tag must be set during streaming write")
            .try_into()
            .expect("tag must be correct length");
        hit_from_temp_obj(
            self.temp
                .read()
                .get(&hash)
                .and_then(|map| map.get(&write_tag.into()))
                .expect("must have partial write in progress"),
        )
    }

    async fn get_miss_handler(
        &'static self,
        key: &CacheKey,
        meta: &CacheMeta,
        _trace: &SpanHandle,
    ) -> Result<MissHandler> {
        let hash = key.combined();
        let meta = meta.serialize()?;
        let temp_obj = TempObject::new(meta);
        let temp_id = self.last_temp_id.fetch_add(1, Ordering::Relaxed);
        let miss_handler = MemMissHandler {
            body: temp_obj.body.clone(),
            bytes_written: temp_obj.bytes_written.clone(),
            key: hash.clone(),
            cache: self.cached.clone(),
            temp: self.temp.clone(),
            temp_id: temp_id.into(),
        };
        self.temp
            .write()
            .entry(hash)
            .or_default()
            .insert(miss_handler.temp_id.into(), temp_obj);
        Ok(Box::new(miss_handler))
    }

    async fn purge(
        &'static self,
        key: &CompactCacheKey,
        _type: PurgeType,
        _trace: &SpanHandle,
    ) -> Result<bool> {
        // This usually purges the primary key because, without a lookup, the variance key is usually
        // empty
        let hash = key.combined();
        let temp_removed = self.temp.write().remove(&hash).is_some();
        let cache_removed = self.cached.write().remove(&hash).is_some();
        Ok(temp_removed || cache_removed)
    }

    async fn update_meta(
        &'static self,
        key: &CacheKey,
        meta: &CacheMeta,
        _trace: &SpanHandle,
    ) -> Result<bool> {
        let hash = key.combined();
        if let Some(obj) = self.cached.write().get_mut(&hash) {
            obj.meta = meta.serialize()?;
            Ok(true)
        } else {
            panic!("no meta found")
        }
    }

    fn support_streaming_partial_write(&self) -> bool {
        true
    }

    fn as_any(&self) -> &(dyn Any + Send + Sync) {
        self
    }
}

#[cfg(test)]
mod test {
    use super::*;
    use cf_rustracing::span::Span;
    use once_cell::sync::Lazy;

    fn gen_meta() -> CacheMeta {
        let mut header = ResponseHeader::build(200, None).unwrap();
        header.append_header("foo1", "bar1").unwrap();
        header.append_header("foo2", "bar2").unwrap();
        header.append_header("foo3", "bar3").unwrap();
        header.append_header("Server", "Pingora").unwrap();
        let internal = crate::meta::InternalMeta::default();
        CacheMeta(Box::new(crate::meta::CacheMetaInner {
            internal,
            header,
            extensions: http::Extensions::new(),
        }))
    }

    #[tokio::test]
    async fn test_write_then_read() {
        static MEM_CACHE: Lazy<MemCache> = Lazy::new(MemCache::new);
        let span = &Span::inactive().handle();

        let key1 = CacheKey::new("", "a", "1");
        let res = MEM_CACHE.lookup(&key1, span).await.unwrap();
        assert!(res.is_none());

        let cache_meta = gen_meta();

        let mut miss_handler = MEM_CACHE
            .get_miss_handler(&key1, &cache_meta, span)
            .await
            .unwrap();
        miss_handler
            .write_body(b"test1"[..].into(), false)
            .await
            .unwrap();
        miss_handler
            .write_body(b"test2"[..].into(), false)
            .await
            .unwrap();
        miss_handler.finish().await.unwrap();

        let (cache_meta2, mut hit_handler) = MEM_CACHE.lookup(&key1, span).await.unwrap().unwrap();
        assert_eq!(
            cache_meta.0.internal.fresh_until,
            cache_meta2.0.internal.fresh_until
        );

        let data = hit_handler.read_body().await.unwrap().unwrap();
        assert_eq!("test1test2", data);
        let data = hit_handler.read_body().await.unwrap();
        assert!(data.is_none());
    }

    #[tokio::test]
    async fn test_read_range() {
        static MEM_CACHE: Lazy<MemCache> = Lazy::new(MemCache::new);
        let span = &Span::inactive().handle();

        let key1 = CacheKey::new("", "a", "1");
        let res = MEM_CACHE.lookup(&key1, span).await.unwrap();
        assert!(res.is_none());

        let cache_meta = gen_meta();

        let mut miss_handler = MEM_CACHE
            .get_miss_handler(&key1, &cache_meta, span)
            .await
            .unwrap();
        miss_handler
            .write_body(b"test1test2"[..].into(), false)
            .await
            .unwrap();
        miss_handler.finish().await.unwrap();

        let (cache_meta2, mut hit_handler) = MEM_CACHE.lookup(&key1, span).await.unwrap().unwrap();
        assert_eq!(
            cache_meta.0.internal.fresh_until,
            cache_meta2.0.internal.fresh_until
        );

        // out of range
        assert!(hit_handler.seek(10000, None).is_err());

        assert!(hit_handler.seek(5, None).is_ok());
        let data = hit_handler.read_body().await.unwrap().unwrap();
        assert_eq!("test2", data);
        let data = hit_handler.read_body().await.unwrap();
        assert!(data.is_none());

        assert!(hit_handler.seek(4, Some(5)).is_ok());
        let data = hit_handler.read_body().await.unwrap().unwrap();
        assert_eq!("1", data);
        let data = hit_handler.read_body().await.unwrap();
        assert!(data.is_none());
    }

    #[tokio::test]
    async fn test_write_while_read() {
        use futures::FutureExt;

        static MEM_CACHE: Lazy<MemCache> = Lazy::new(MemCache::new);
        let span = &Span::inactive().handle();

        let key1 = CacheKey::new("", "a", "1");
        let res = MEM_CACHE.lookup(&key1, span).await.unwrap();
        assert!(res.is_none());

        let cache_meta = gen_meta();

        let mut miss_handler = MEM_CACHE
            .get_miss_handler(&key1, &cache_meta, span)
            .await
            .unwrap();

        // first reader
        let (cache_meta1, mut hit_handler1) = MEM_CACHE.lookup(&key1, span).await.unwrap().unwrap();
        assert_eq!(
            cache_meta.0.internal.fresh_until,
            cache_meta1.0.internal.fresh_until
        );

        // No body to read
        let res = hit_handler1.read_body().now_or_never();
        assert!(res.is_none());

        miss_handler
            .write_body(b"test1"[..].into(), false)
            .await
            .unwrap();

        let data = hit_handler1.read_body().await.unwrap().unwrap();
        assert_eq!("test1", data);
        let res = hit_handler1.read_body().now_or_never();
        assert!(res.is_none());

        miss_handler
            .write_body(b"test2"[..].into(), false)
            .await
            .unwrap();
        let data = hit_handler1.read_body().await.unwrap().unwrap();
        assert_eq!("test2", data);

        // second reader
        let (cache_meta2, mut hit_handler2) = MEM_CACHE.lookup(&key1, span).await.unwrap().unwrap();
        assert_eq!(
            cache_meta.0.internal.fresh_until,
            cache_meta2.0.internal.fresh_until
        );

        let data = hit_handler2.read_body().await.unwrap().unwrap();
        assert_eq!("test1test2", data);
        let res = hit_handler2.read_body().now_or_never();
        assert!(res.is_none());

        let res = hit_handler1.read_body().now_or_never();
        assert!(res.is_none());

        miss_handler.finish().await.unwrap();

        let data = hit_handler1.read_body().await.unwrap();
        assert!(data.is_none());
        let data = hit_handler2.read_body().await.unwrap();
        assert!(data.is_none());
    }

    #[tokio::test]
    async fn test_purge_partial() {
        static MEM_CACHE: Lazy<MemCache> = Lazy::new(MemCache::new);
        let cache = &MEM_CACHE;

        let key = CacheKey::new("", "a", "1").to_compact();
        let hash = key.combined();
        let meta = (
            "meta_key".as_bytes().to_vec(),
            "meta_value".as_bytes().to_vec(),
        );

        let temp_obj = TempObject::new(meta);
        let mut map = HashMap::new();
        map.insert(0, temp_obj);
        cache.temp.write().insert(hash.clone(), map);

        assert!(cache.temp.read().contains_key(&hash));

        let result = cache
            .purge(&key, PurgeType::Invalidation, &Span::inactive().handle())
            .await;
        assert!(result.is_ok());

        assert!(!cache.temp.read().contains_key(&hash));
    }

    #[tokio::test]
    async fn test_purge_complete() {
        static MEM_CACHE: Lazy<MemCache> = Lazy::new(MemCache::new);
        let cache = &MEM_CACHE;

        let key = CacheKey::new("", "a", "1").to_compact();
        let hash = key.combined();
        let meta = (
            "meta_key".as_bytes().to_vec(),
            "meta_value".as_bytes().to_vec(),
        );
        let body = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 0];
        let cache_obj = CacheObject {
            meta,
            body: Arc::new(body),
        };
        cache.cached.write().insert(hash.clone(), cache_obj);

        assert!(cache.cached.read().contains_key(&hash));

        let result = cache
            .purge(&key, PurgeType::Invalidation, &Span::inactive().handle())
            .await;
        assert!(result.is_ok());

        assert!(!cache.cached.read().contains_key(&hash));
    }
}


================================================
FILE: pingora-cache/src/meta.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Metadata for caching

pub use http::Extensions;
use log::warn;
use once_cell::sync::{Lazy, OnceCell};
use pingora_error::{Error, ErrorType::*, OrErr, Result};
use pingora_header_serde::HeaderSerde;
use pingora_http::{HMap, ResponseHeader};
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
use std::time::{Duration, SystemTime};

use crate::key::HashBinary;

pub(crate) type InternalMeta = internal_meta::InternalMetaLatest;
mod internal_meta {
    use super::*;

    pub(crate) type InternalMetaLatest = InternalMetaV2;

    #[derive(Debug, Deserialize, Serialize, Clone)]
    pub(crate) struct InternalMetaV0 {
        pub(crate) fresh_until: SystemTime,
        pub(crate) created: SystemTime,
        pub(crate) stale_while_revalidate_sec: u32,
        pub(crate) stale_if_error_sec: u32,
        // Do not add more field
    }

    impl InternalMetaV0 {
        #[allow(dead_code)]
        fn serialize(&self) -> Result<Vec<u8>> {
            rmp_serde::encode::to_vec(self).or_err(InternalError, "failed to encode cache meta")
        }

        fn deserialize(buf: &[u8]) -> Result<Self> {
            rmp_serde::decode::from_slice(buf)
                .or_err(InternalError, "failed to decode cache meta v0")
        }
    }

    #[derive(Debug, Deserialize, Serialize, Clone)]
    pub(crate) struct InternalMetaV1 {
        pub(crate) version: u8,
        pub(crate) fresh_until: SystemTime,
        pub(crate) created: SystemTime,
        pub(crate) stale_while_revalidate_sec: u32,
        pub(crate) stale_if_error_sec: u32,
        // Do not add more field
    }

    impl InternalMetaV1 {
        #[allow(dead_code)]
        pub const VERSION: u8 = 1;

        #[allow(dead_code)]
        pub fn serialize(&self) -> Result<Vec<u8>> {
            assert_eq!(self.version, 1);
            rmp_serde::encode::to_vec(self).or_err(InternalError, "failed to encode cache meta")
        }

        fn deserialize(buf: &[u8]) -> Result<Self> {
            rmp_serde::decode::from_slice(buf)
                .or_err(InternalError, "failed to decode cache meta v1")
        }
    }

    #[derive(Debug, Deserialize, Serialize, Clone)]
    pub(crate) struct InternalMetaV2 {
        pub(crate) version: u8,
        pub(crate) fresh_until: SystemTime,
        pub(crate) created: SystemTime,
        pub(crate) updated: SystemTime,
        pub(crate) stale_while_revalidate_sec: u32,
        pub(crate) stale_if_error_sec: u32,
        // Only the extended field to be added below. One field at a time.
        // 1. serde default in order to accept an older version schema without the field existing
        // 2. serde skip_serializing_if in order for software with only an older version of this
        //    schema to decode it
        // After full releases, remove `skip_serializing_if` so that we can add the next extended field.
        #[serde(default)]
        pub(crate) variance: Option<HashBinary>,
        #[serde(default)]
        #[serde(skip_serializing_if = "Option::is_none")]
        pub(crate) epoch_override: Option<SystemTime>,
    }

    impl Default for InternalMetaV2 {
        fn default() -> Self {
            let epoch = SystemTime::UNIX_EPOCH;
            InternalMetaV2 {
                version: InternalMetaV2::VERSION,
                fresh_until: epoch,
                created: epoch,
                updated: epoch,
                stale_while_revalidate_sec: 0,
                stale_if_error_sec: 0,
                variance: None,
                epoch_override: None,
            }
        }
    }

    impl InternalMetaV2 {
        pub const VERSION: u8 = 2;

        pub fn serialize(&self) -> Result<Vec<u8>> {
            assert_eq!(self.version, Self::VERSION);
            rmp_serde::encode::to_vec(self).or_err(InternalError, "failed to encode cache meta")
        }

        fn deserialize(buf: &[u8]) -> Result<Self> {
            rmp_serde::decode::from_slice(buf)
                .or_err(InternalError, "failed to decode cache meta v2")
        }
    }

    impl From<InternalMetaV0> for InternalMetaV2 {
        fn from(v0: InternalMetaV0) -> Self {
            InternalMetaV2 {
                version: InternalMetaV2::VERSION,
                fresh_until: v0.fresh_until,
                created: v0.created,
                updated: v0.created,
                stale_while_revalidate_sec: v0.stale_while_revalidate_sec,
                stale_if_error_sec: v0.stale_if_error_sec,
                ..Default::default()
            }
        }
    }

    impl From<InternalMetaV1> for InternalMetaV2 {
        fn from(v1: InternalMetaV1) -> Self {
            InternalMetaV2 {
                version: InternalMetaV2::VERSION,
                fresh_until: v1.fresh_until,
                created: v1.created,
                updated: v1.created,
                stale_while_revalidate_sec: v1.stale_while_revalidate_sec,
                stale_if_error_sec: v1.stale_if_error_sec,
                ..Default::default()
            }
        }
    }

    // cross version decode
    pub(crate) fn deserialize(buf: &[u8]) -> Result<InternalMetaLatest> {
        const MIN_SIZE: usize = 10; // a small number to read the first few bytes
        if buf.len() < MIN_SIZE {
            return Error::e_explain(
                InternalError,
                format!("Buf too short ({}) to be InternalMeta", buf.len()),
            );
        }
        let preread_buf = &mut &buf[..MIN_SIZE];
        // the struct is always packed as a fixed size array
        match rmp::decode::read_array_len(preread_buf)
            .or_err(InternalError, "failed to decode cache meta array size")?
        {
            // v0 has 4 items and no version number
            4 => Ok(InternalMetaV0::deserialize(buf)?.into()),
            // other V should have version number encoded
            _ => {
                // rmp will encode `version` < 128 into a fixint (one byte),
                // so we use read_pfix
                let version = rmp::decode::read_pfix(preread_buf)
                    .or_err(InternalError, "failed to decode meta version")?;
                match version {
                    1 => Ok(InternalMetaV1::deserialize(buf)?.into()),
                    2 => InternalMetaV2::deserialize(buf),
                    _ => Error::e_explain(
                        InternalError,
                        format!("Unknown InternalMeta version {version}"),
                    ),
                }
            }
        }
    }

    #[cfg(test)]
    mod tests {
        use super::*;

        #[test]
        fn test_internal_meta_serde_v0() {
            let meta = InternalMetaV0 {
                fresh_until: SystemTime::now(),
                created: SystemTime::now(),
                stale_while_revalidate_sec: 0,
                stale_if_error_sec: 0,
            };
            let binary = meta.serialize().unwrap();
            let meta2 = InternalMetaV0::deserialize(&binary).unwrap();
            assert_eq!(meta.fresh_until, meta2.fresh_until);
        }

        #[test]
        fn test_internal_meta_serde_v1() {
            let meta = InternalMetaV1 {
                version: InternalMetaV1::VERSION,
                fresh_until: SystemTime::now(),
                created: SystemTime::now(),
                stale_while_revalidate_sec: 0,
                stale_if_error_sec: 0,
            };
            let binary = meta.serialize().unwrap();
            let meta2 = InternalMetaV1::deserialize(&binary).unwrap();
            assert_eq!(meta.fresh_until, meta2.fresh_until);
        }

        #[test]
        fn test_internal_meta_serde_v2() {
            let meta = InternalMetaV2::default();
            let binary = meta.serialize().unwrap();
            let meta2 = InternalMetaV2::deserialize(&binary).unwrap();
            assert_eq!(meta2.version, 2);
            assert_eq!(meta.fresh_until, meta2.fresh_until);
            assert_eq!(meta.created, meta2.created);
            assert_eq!(meta.updated, meta2.updated);
        }

        #[test]
        fn test_internal_meta_serde_across_versions() {
            let meta = InternalMetaV0 {
                fresh_until: SystemTime::now(),
                created: SystemTime::now(),
                stale_while_revalidate_sec: 0,
                stale_if_error_sec: 0,
            };
            let binary = meta.serialize().unwrap();
            let meta2 = deserialize(&binary).unwrap();
            assert_eq!(meta2.version, 2);
            assert_eq!(meta.fresh_until, meta2.fresh_until);

            let meta = InternalMetaV1 {
                version: 1,
                fresh_until: SystemTime::now(),
                created: SystemTime::now(),
                stale_while_revalidate_sec: 0,
                stale_if_error_sec: 0,
            };
            let binary = meta.serialize().unwrap();
            let meta2 = deserialize(&binary).unwrap();
            assert_eq!(meta2.version, 2);
            assert_eq!(meta.fresh_until, meta2.fresh_until);
            // `updated` == `created` when upgrading to v2
            assert_eq!(meta2.created, meta2.updated);
        }

        // make sure that v2 format is backward compatible
        // this is the base version of v2 without any extended fields
        #[derive(Deserialize, Serialize)]
        struct InternalMetaV2Base {
            version: u8,
            fresh_until: SystemTime,
            created: SystemTime,
            updated: SystemTime,
            stale_while_revalidate_sec: u32,
            stale_if_error_sec: u32,
        }

        impl InternalMetaV2Base {
            pub const VERSION: u8 = 2;
            pub fn serialize(&self) -> Result<Vec<u8>> {
                assert!(self.version >= Self::VERSION);
                rmp_serde::encode::to_vec(self).or_err(InternalError, "failed to encode cache meta")
            }
            fn deserialize(buf: &[u8]) -> Result<Self> {
                rmp_serde::decode::from_slice(buf)
                    .or_err(InternalError, "failed to decode cache meta v2")
            }
        }

        // this is the base version of v2 with variance but without epoch_override
        #[derive(Deserialize, Serialize)]
        struct InternalMetaV2BaseWithVariance {
            version: u8,
            fresh_until: SystemTime,
            created: SystemTime,
            updated: SystemTime,
            stale_while_revalidate_sec: u32,
            stale_if_error_sec: u32,
            #[serde(default)]
            #[serde(skip_serializing_if = "Option::is_none")]
            variance: Option<HashBinary>,
        }

        impl Default for InternalMetaV2BaseWithVariance {
            fn default() -> Self {
                let epoch = SystemTime::UNIX_EPOCH;
                InternalMetaV2BaseWithVariance {
                    version: InternalMetaV2::VERSION,
                    fresh_until: epoch,
                    created: epoch,
                    updated: epoch,
                    stale_while_revalidate_sec: 0,
                    stale_if_error_sec: 0,
                    variance: None,
                }
            }
        }

        impl InternalMetaV2BaseWithVariance {
            pub const VERSION: u8 = 2;
            pub fn serialize(&self) -> Result<Vec<u8>> {
                assert!(self.version >= Self::VERSION);
                rmp_serde::encode::to_vec(self).or_err(InternalError, "failed to encode cache meta")
            }
            fn deserialize(buf: &[u8]) -> Result<Self> {
                rmp_serde::decode::from_slice(buf)
                    .or_err(InternalError, "failed to decode cache meta v2")
            }
        }

        #[test]
        fn test_internal_meta_serde_v2_extend_fields_variance() {
            // ext V2 to base v2
            let meta = InternalMetaV2BaseWithVariance::default();
            let binary = meta.serialize().unwrap();
            let meta2 = InternalMetaV2Base::deserialize(&binary).unwrap();
            assert_eq!(meta2.version, 2);
            assert_eq!(meta.fresh_until, meta2.fresh_until);
            assert_eq!(meta.created, meta2.created);
            assert_eq!(meta.updated, meta2.updated);

            // base V2 to ext v2
            let now = SystemTime::now();
            let meta = InternalMetaV2Base {
                version: InternalMetaV2::VERSION,
                fresh_until: now,
                created: now,
                updated: now,
                stale_while_revalidate_sec: 0,
                stale_if_error_sec: 0,
            };
            let binary = meta.serialize().unwrap();
            let meta2 = InternalMetaV2BaseWithVariance::deserialize(&binary).unwrap();
            assert_eq!(meta2.version, 2);
            assert_eq!(meta.fresh_until, meta2.fresh_until);
            assert_eq!(meta.created, meta2.created);
            assert_eq!(meta.updated, meta2.updated);
        }

        #[test]
        fn test_internal_meta_serde_v2_extend_fields_epoch_override() {
            let now = SystemTime::now();

            // ext V2 (with epoch_override = None) to V2 with variance (without epoch_override field)
            let meta = InternalMetaV2 {
                fresh_until: now,
                created: now,
                updated: now,
                epoch_override: None, // None means it will be skipped during serialization
                ..Default::default()
            };
            let binary = meta.serialize().unwrap();
            let meta2 = InternalMetaV2BaseWithVariance::deserialize(&binary).unwrap();
            assert_eq!(meta2.version, 2);
            assert_eq!(meta.fresh_until, meta2.fresh_until);
            assert_eq!(meta.created, meta2.created);
            assert_eq!(meta.updated, meta2.updated);
            assert!(meta2.variance.is_none());

            // V2 base with variance (without epoch_override) to ext V2 (with epoch_override)
            let mut meta = InternalMetaV2BaseWithVariance {
                version: InternalMetaV2::VERSION,
                fresh_until: now,
                created: now,
                updated: now,
                stale_while_revalidate_sec: 0,
                stale_if_error_sec: 0,
                variance: None,
            };
            let binary = meta.serialize().unwrap();
            let meta2 = InternalMetaV2::deserialize(&binary).unwrap();
            assert_eq!(meta2.version, 2);
            assert_eq!(meta.fresh_until, meta2.fresh_until);
            assert_eq!(meta.created, meta2.created);
            assert_eq!(meta.updated, meta2.updated);
            assert!(meta2.variance.is_none());
            assert!(meta2.epoch_override.is_none());

            // try with variance set
            meta.variance = Some(*b"variance_testing");
            let binary = meta.serialize().unwrap();
            let meta2 = InternalMetaV2::deserialize(&binary).unwrap();
            assert_eq!(meta2.version, 2);
            assert_eq!(meta.fresh_until, meta2.fresh_until);
            assert_eq!(meta.created, meta2.created);
            assert_eq!(meta.updated, meta2.updated);
            assert_eq!(meta.variance, meta2.variance);
            assert!(meta2.epoch_override.is_none());
        }
    }
}

#[derive(Debug)]
pub(crate) struct CacheMetaInner {
    // http header and Internal meta have different ways of serialization, so keep them separated
    pub(crate) internal: InternalMeta,
    pub(crate) header: ResponseHeader,
    /// An opaque type map to hold extra information for communication between cache backends
    /// and users. This field is **not** guaranteed be persistently stored in the cache backend.
    pub extensions: Extensions,
}

/// The cacheable response header and cache metadata
#[derive(Debug)]
pub struct CacheMeta(pub(crate) Box<CacheMetaInner>);

impl CacheMeta {
    /// Create a [CacheMeta] from the given metadata and the response header
    pub fn new(
        fresh_until: SystemTime,
        created: SystemTime,
        stale_while_revalidate_sec: u32,
        stale_if_error_sec: u32,
        header: ResponseHeader,
    ) -> CacheMeta {
        CacheMeta(Box::new(CacheMetaInner {
            internal: InternalMeta {
                version: InternalMeta::VERSION,
                fresh_until,
                created,
                updated: created, // created == updated for new meta
                stale_while_revalidate_sec,
                stale_if_error_sec,
                ..Default::default()
            },
            header,
            extensions: Extensions::new(),
        }))
    }

    /// When the asset was created/admitted to cache
    pub fn created(&self) -> SystemTime {
        self.0.internal.created
    }

    /// The last time the asset was revalidated
    ///
    /// This value will be the same as [Self::created()] if no revalidation ever happens
    pub fn updated(&self) -> SystemTime {
        self.0.internal.updated
    }

    /// The reference point for cache age. This represents the "starting point" for `fresh_until`.
    ///
    /// This defaults to the `updated` timestamp but is overridden by the `epoch_override` field
    /// if set.
    pub fn epoch(&self) -> SystemTime {
        self.0.internal.epoch_override.unwrap_or(self.updated())
    }

    /// Get the epoch override for this asset
    pub fn epoch_override(&self) -> Option<SystemTime> {
        self.0.internal.epoch_override
    }

    /// Set the epoch override for this asset
    ///
    /// When set, this will be used as the reference point for calculating age and freshness
    /// instead of the updated time.
    pub fn set_epoch_override(&mut self, epoch: SystemTime) {
        self.0.internal.epoch_override = Some(epoch);
    }

    /// Remove the epoch override for this asset
    pub fn remove_epoch_override(&mut self) {
        self.0.internal.epoch_override = None;
    }

    /// Is the asset still valid
    pub fn is_fresh(&self, time: SystemTime) -> bool {
        // NOTE: HTTP cache time resolution is second
        self.0.internal.fresh_until >= time
    }

    /// How long (in seconds) the asset should be fresh since its admission/revalidation
    ///
    /// This is essentially the max-age value (or its equivalence).
    /// If an epoch override is set, it will be used as the reference point instead of the updated time.
    pub fn fresh_sec(&self) -> u64 {
        // swallow `duration_since` error, assets that are always stale have earlier `fresh_until` than `created`
        // practically speaking we can always treat these as 0 ttl
        // XXX: return Error if `fresh_until` is much earlier than expected?
        let reference = self.epoch();
        self.0
            .internal
            .fresh_until
            .duration_since(reference)
            .map_or(0, |duration| duration.as_secs())
    }

    /// Until when the asset is considered fresh
    pub fn fresh_until(&self) -> SystemTime {
        self.0.internal.fresh_until
    }

    /// How old the asset is since its admission/revalidation
    ///
    /// If an epoch override is set, it will be used as the reference point instead of the updated time.
    pub fn age(&self) -> Duration {
        let reference = self.epoch();
        SystemTime::now()
            .duration_since(reference)
            .unwrap_or_default()
    }

    /// The stale-while-revalidate limit in seconds
    pub fn stale_while_revalidate_sec(&self) -> u32 {
        self.0.internal.stale_while_revalidate_sec
    }

    /// The stale-if-error limit in seconds
    pub fn stale_if_error_sec(&self) -> u32 {
        self.0.internal.stale_if_error_sec
    }

    /// Can the asset be used to serve stale during revalidation at the given time.
    ///
    /// NOTE: the serve stale functions do not check !is_fresh(time),
    /// i.e. the object is already assumed to be stale.
    pub fn serve_stale_while_revalidate(&self, time: SystemTime) -> bool {
        self.can_serve_stale(self.0.internal.stale_while_revalidate_sec, time)
    }

    /// Can the asset be used to serve stale after error at the given time.
    ///
    /// NOTE: the serve stale functions do not check !is_fresh(time),
    /// i.e. the object is already assumed to be stale.
    pub fn serve_stale_if_error(&self, time: SystemTime) -> bool {
        self.can_serve_stale(self.0.internal.stale_if_error_sec, time)
    }

    /// Disable serve stale for this asset
    pub fn disable_serve_stale(&mut self) {
        self.0.internal.stale_if_error_sec = 0;
        self.0.internal.stale_while_revalidate_sec = 0;
    }

    /// Get the variance hash of this asset
    pub fn variance(&self) -> Option<HashBinary> {
        self.0.internal.variance
    }

    /// Set the variance key of this asset
    pub fn set_variance_key(&mut self, variance_key: HashBinary) {
        self.0.internal.variance = Some(variance_key);
    }

    /// Set the variance (hash) of this asset
    pub fn set_variance(&mut self, variance: HashBinary) {
        self.0.internal.variance = Some(variance)
    }

    /// Removes the variance (hash) of this asset
    pub fn remove_variance(&mut self) {
        self.0.internal.variance = None
    }

    /// Get the response header in this asset
    pub fn response_header(&self) -> &ResponseHeader {
        &self.0.header
    }

    /// Modify the header in this asset
    pub fn response_header_mut(&mut self) -> &mut ResponseHeader {
        &mut self.0.header
    }

    /// Expose the extensions to read
    pub fn extensions(&self) -> &Extensions {
        &self.0.extensions
    }

    /// Expose the extensions to modify
    pub fn extensions_mut(&mut self) -> &mut Extensions {
        &mut self.0.extensions
    }

    /// Get a copy of the response header
    pub fn response_header_copy(&self) -> ResponseHeader {
        self.0.header.clone()
    }

    /// get all the headers of this asset
    pub fn headers(&self) -> &HMap {
        &self.0.header.headers
    }

    fn can_serve_stale(&self, serve_stale_sec: u32, time: SystemTime) -> bool {
        if serve_stale_sec == 0 {
            return false;
        }
        if let Some(stale_until) = self
            .0
            .internal
            .fresh_until
            .checked_add(Duration::from_secs(serve_stale_sec.into()))
        {
            stale_until >= time
        } else {
            // overflowed: treat as infinite ttl
            true
        }
    }

    /// Serialize this object
    pub fn serialize(&self) -> Result<(Vec<u8>, Vec<u8>)> {
        let internal = self.0.internal.serialize()?;
        let header = header_serialize(&self.0.header)?;
        log::debug!("header to serialize: {:?}", &self.0.header);
        Ok((internal, header))
    }

    /// Deserialize from the binary format
    pub fn deserialize(internal: &[u8], header: &[u8]) -> Result<Self> {
        let internal = internal_meta::deserialize(internal)?;
        let header = header_deserialize(header)?;
        Ok(CacheMeta(Box::new(CacheMetaInner {
            internal,
            header,
            extensions: Extensions::new(),
        })))
    }
}

use http::StatusCode;

/// The function to generate TTL from the given [StatusCode].
pub type FreshDurationByStatusFn = fn(StatusCode) -> Option<Duration>;

/// The default settings to generate [CacheMeta]
pub struct CacheMetaDefaults {
    // if a status code is not included in fresh_sec, it's not considered cacheable by default.
    fresh_sec_fn: FreshDurationByStatusFn,
    stale_while_revalidate_sec: u32,
    // TODO: allow "error" condition to be configurable?
    stale_if_error_sec: u32,
}

impl CacheMetaDefaults {
    /// Create a new [CacheMetaDefaults]
    pub const fn new(
        fresh_sec_fn: FreshDurationByStatusFn,
        stale_while_revalidate_sec: u32,
        stale_if_error_sec: u32,
    ) -> Self {
        CacheMetaDefaults {
            fresh_sec_fn,
            stale_while_revalidate_sec,
            stale_if_error_sec,
        }
    }

    /// Return the default TTL for the given [StatusCode]
    ///
    /// `None`: do no cache this code.
    pub fn fresh_sec(&self, resp_status: StatusCode) -> Option<Duration> {
        // safe guard to make sure 304 response to share the same default ttl of 200
        if resp_status == StatusCode::NOT_MODIFIED {
            (self.fresh_sec_fn)(StatusCode::OK)
        } else {
            (self.fresh_sec_fn)(resp_status)
        }
    }

    /// The default SWR seconds
    pub fn serve_stale_while_revalidate_sec(&self) -> u32 {
        self.stale_while_revalidate_sec
    }

    /// The default SIE seconds
    pub fn serve_stale_if_error_sec(&self) -> u32 {
        self.stale_if_error_sec
    }
}

/// The dictionary content for header compression.
///
/// Used during initialization of [`HEADER_SERDE`].
static COMPRESSION_DICT_CONTENT: OnceCell<Cow<'static, [u8]>> = OnceCell::new();

static HEADER_SERDE: Lazy<HeaderSerde> = Lazy::new(|| {
    let dict_opt = if let Some(dict_content) = COMPRESSION_DICT_CONTENT.get() {
        Some(dict_content.to_vec())
    } else {
        warn!("no header compression dictionary loaded - use set_compression_dict_content() or set_compression_dict_path() to set one");
        None
    };

    HeaderSerde::new(dict_opt)
});

pub(crate) fn header_serialize(header: &ResponseHeader) -> Result<Vec<u8>> {
    HEADER_SERDE.serialize(header)
}

pub(crate) fn header_deserialize<T: AsRef<[u8]>>(buf: T) -> Result<ResponseHeader> {
    HEADER_SERDE.deserialize(buf.as_ref())
}

/// Load the header compression dictionary from a file, which helps serialize http header.
///
/// Returns false if it is already set or if the file could not be read.
///
/// Use [`set_compression_dict_content`] to set the dictionary from memory instead.
pub fn set_compression_dict_path(path: &str) -> bool {
    match std::fs::read(path) {
        Ok(dict) => COMPRESSION_DICT_CONTENT.set(dict.into()).is_ok(),
        Err(e) => {
            warn!(
                "failed to read header compress dictionary file at {}, {:?}",
                path, e
            );
            false
        }
    }
}

/// Set the header compression dictionary content, which helps serialize http header.
///
/// Returns false if it is already set.
///
/// This is an alernative to [`set_compression_dict_path`], allowing use of
/// a dictionary without an external file.
pub fn set_compression_dict_content(content: Cow<'static, [u8]>) -> bool {
    COMPRESSION_DICT_CONTENT.set(content).is_ok()
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::time::Duration;

    #[test]
    fn test_cache_meta_age_without_override() {
        let now = SystemTime::now();
        let header = ResponseHeader::build_no_case(200, None).unwrap();
        let meta = CacheMeta::new(now + Duration::from_secs(300), now, 0, 0, header);

        // Without epoch_override, age() should use updated() as reference
        std::thread::sleep(Duration::from_millis(100));
        let age = meta.age();
        assert!(age.as_secs() < 1, "age should be close to 0");

        // epoch() should return updated() when no override is set
        assert_eq!(meta.epoch(), meta.updated());
    }

    #[test]
    fn test_cache_meta_age_with_epoch_override_past() {
        let now = SystemTime::now();
        let header = ResponseHeader::build(200, None).unwrap();
        let mut meta = CacheMeta::new(now + Duration::from_secs(300), now, 0, 0, header);

        // Set epoch_override to 10 seconds in the past
        let epoch_override = now - Duration::from_secs(10);
        meta.set_epoch_override(epoch_override);

        // age() should now use epoch_override as the reference
        let age = meta.age();
        assert!(age.as_secs() >= 10);
        assert!(age.as_secs() < 12);

        // epoch() should return the override
        assert_eq!(meta.epoch(), epoch_override);
        assert_eq!(meta.epoch_override(), Some(epoch_override));
    }

    #[test]
    fn test_cache_meta_age_with_epoch_override_future() {
        let now = SystemTime::now();
        let header = ResponseHeader::build(200, None).unwrap();
        let mut meta = CacheMeta::new(now + Duration::from_secs(100), now, 0, 0, header);

        // Set epoch_override to a future time
        let future_epoch = now + Duration::from_secs(10);
        meta.set_epoch_override(future_epoch);

        let age_with_epoch = meta.age();
        // age should be 0 since epoch_override is in the future
        assert_eq!(age_with_epoch, Duration::ZERO);
    }

    #[test]
    fn test_cache_meta_fresh_sec() {
        let header = ResponseHeader::build(StatusCode::OK, None).unwrap();
        let mut meta = CacheMeta::new(
            SystemTime::now() + Duration::from_secs(100),
            SystemTime::now() - Duration::from_secs(100),
            0,
            0,
            header,
        );

        meta.0.internal.updated = SystemTime::UNIX_EPOCH + Duration::from_secs(1000);
        meta.0.internal.fresh_until = SystemTime::UNIX_EPOCH + Duration::from_secs(1100);

        // Without epoch_override, fresh_sec should use updated as reference
        let fresh_sec_without_override = meta.fresh_sec();
        assert_eq!(fresh_sec_without_override, 100); // 1100 - 1000 = 100 seconds

        // With epoch_override set to a later time (1050), fresh_sec should be calculated from that reference
        let epoch_override = SystemTime::UNIX_EPOCH + Duration::from_secs(1050);
        meta.set_epoch_override(epoch_override);
        assert_eq!(meta.epoch_override(), Some(epoch_override));
        assert_eq!(meta.epoch(), epoch_override);

        let fresh_sec_with_override = meta.fresh_sec();
        // fresh_until - epoch_override = 1100 - 1050 = 50 seconds
        assert_eq!(fresh_sec_with_override, 50);

        meta.remove_epoch_override();
        assert_eq!(meta.epoch_override(), None);
        assert_eq!(meta.epoch(), meta.updated());
        assert_eq!(meta.fresh_sec(), 100); // back to normal calculation
    }
}


================================================
FILE: pingora-cache/src/predictor.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Cacheability Predictor

use crate::hashtable::{ConcurrentLruCache, LruShard};

pub type CustomReasonPredicate = fn(&'static str) -> bool;

/// Cacheability Predictor
///
/// Remembers previously uncacheable assets.
/// Allows bypassing cache / cache lock early based on historical precedent.
///
/// NOTE: to simply avoid caching requests with certain characteristics,
/// add checks in request_cache_filter to avoid enabling cache in the first place.
/// The predictor's bypass mechanism handles cases where the request _looks_ cacheable
/// but its previous responses suggest otherwise. The request _could_ be cacheable in the future.
pub struct Predictor<const N_SHARDS: usize> {
    uncacheable_keys: ConcurrentLruCache<(), N_SHARDS>,
    skip_custom_reasons_fn: Option<CustomReasonPredicate>,
}

use crate::{key::CacheHashKey, CacheKey, NoCacheReason};
use log::debug;

/// The cache predictor trait.
///
/// This trait allows user defined predictor to replace [Predictor].
pub trait CacheablePredictor {
    /// Return true if likely cacheable, false if likely not.
    fn cacheable_prediction(&self, key: &CacheKey) -> bool;

    /// Mark cacheable to allow next request to cache.
    /// Returns false if the key was already marked cacheable.
    fn mark_cacheable(&self, key: &CacheKey) -> bool;

    /// Mark uncacheable to actively bypass cache on the next request.
    /// May skip marking on certain NoCacheReasons.
    /// Returns None if we skipped marking uncacheable.
    /// Returns Some(false) if the key was already marked uncacheable.
    fn mark_uncacheable(&self, key: &CacheKey, reason: NoCacheReason) -> Option<bool>;
}

// This particular bit of `where [LruShard...; N]: Default` nonsense arises from
// ConcurrentLruCache needing this trait bound, which in turns arises from the Rust
// compiler not being able to guarantee that all array sizes N implement `Default`.
// See https://github.com/rust-lang/rust/issues/61415
impl<const N_SHARDS: usize> Predictor<N_SHARDS>
where
    [LruShard<()>; N_SHARDS]: Default,
{
    /// Create a new Predictor with `N_SHARDS * shard_capacity` total capacity for
    /// uncacheable cache keys.
    ///
    /// - `shard_capacity`: defines number of keys remembered as uncacheable per LRU shard.
    /// - `skip_custom_reasons_fn`: an optional predicate used in `mark_uncacheable`
    ///   that can customize which `Custom` `NoCacheReason`s ought to be remembered as uncacheable.
    ///   If the predicate returns true, then the predictor will skip remembering the current
    ///   cache key as uncacheable (and avoid bypassing cache on the next request).
    pub fn new(
        shard_capacity: usize,
        skip_custom_reasons_fn: Option<CustomReasonPredicate>,
    ) -> Predictor<N_SHARDS> {
        Predictor {
            uncacheable_keys: ConcurrentLruCache::<(), N_SHARDS>::new(shard_capacity),
            skip_custom_reasons_fn,
        }
    }
}

impl<const N_SHARDS: usize> CacheablePredictor for Predictor<N_SHARDS>
where
    [LruShard<()>; N_SHARDS]: Default,
{
    fn cacheable_prediction(&self, key: &CacheKey) -> bool {
        // variance key is ignored because this check happens before cache lookup
        let hash = key.primary_bin();
        let key = u128::from_be_bytes(hash); // Endianness doesn't matter

        // Note: LRU updated in mark_* functions only,
        // as we assume the caller always updates the cacheability of the response later
        !self.uncacheable_keys.read(key).contains(&key)
    }

    fn mark_cacheable(&self, key: &CacheKey) -> bool {
        // variance key is ignored because cacheable_prediction() is called before cache lookup
        // where the variance key is unknown
        let hash = key.primary_bin();
        let key = u128::from_be_bytes(hash);

        let cache = self.uncacheable_keys.get(key);
        if !cache.read().contains(&key) {
            // not in uncacheable list, nothing to do
            return true;
        }

        let mut cache = cache.write();
        cache.pop(&key);
        debug!("bypassed request became cacheable");
        false
    }

    fn mark_uncacheable(&self, key: &CacheKey, reason: NoCacheReason) -> Option<bool> {
        // only mark as uncacheable for the future on certain reasons,
        // (e.g. InternalErrors)
        use NoCacheReason::*;
        match reason {
            // CacheLockGiveUp: the writer will set OriginNotCache (if applicable)
            // readers don't need to do it
            NeverEnabled
            | StorageError
            | InternalError
            | Deferred
            | CacheLockGiveUp
            | CacheLockTimeout
            | DeclinedToUpstream
            | UpstreamError
            | PredictedResponseTooLarge => {
                return None;
            }
            // Skip certain NoCacheReason::Custom according to user
            Custom(reason) if self.skip_custom_reasons_fn.is_some_and(|f| f(reason)) => {
                return None;
            }
            Custom(_) | OriginNotCache | ResponseTooLarge => { /* mark uncacheable for these only */
            }
        }

        // variance key is ignored because cacheable_prediction() is called before cache lookup
        // where the variance key is unknown
        let hash = key.primary_bin();
        let key = u128::from_be_bytes(hash);

        let mut cache = self.uncacheable_keys.get(key).write();
        // put() returns Some(old_value) if the key existed, else None
        let new_key = cache.put(key, ()).is_none();
        if new_key {
            debug!("request marked uncacheable");
        }
        Some(new_key)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    #[test]
    fn test_mark_cacheability() {
        let predictor = Predictor::<1>::new(10, None);
        let key = CacheKey::new("a", "b", "c");
        // cacheable if no history
        assert!(predictor.cacheable_prediction(&key));

        // don't remember internal / storage errors
        predictor.mark_uncacheable(&key, NoCacheReason::InternalError);
        assert!(predictor.cacheable_prediction(&key));
        predictor.mark_uncacheable(&key, NoCacheReason::StorageError);
        assert!(predictor.cacheable_prediction(&key));

        // origin explicitly said uncacheable
        predictor.mark_uncacheable(&key, NoCacheReason::OriginNotCache);
        assert!(!predictor.cacheable_prediction(&key));

        // mark cacheable again
        predictor.mark_cacheable(&key);
        assert!(predictor.cacheable_prediction(&key));
    }

    #[test]
    fn test_custom_skip_predicate() {
        let predictor = Predictor::<1>::new(
            10,
            Some(|custom_reason| matches!(custom_reason, "Skipping")),
        );
        let key = CacheKey::new("a", "b", "c");
        // cacheable if no history
        assert!(predictor.cacheable_prediction(&key));

        // custom predicate still uses default skip reasons
        predictor.mark_uncacheable(&key, NoCacheReason::InternalError);
        assert!(predictor.cacheable_prediction(&key));

        // other custom reasons can still be marked uncacheable
        predictor.mark_uncacheable(&key, NoCacheReason::Custom("DontCacheMe"));
        assert!(!predictor.cacheable_prediction(&key));

        let key = CacheKey::new("a", "c", "d");
        assert!(predictor.cacheable_prediction(&key));
        // specific custom reason is skipped
        predictor.mark_uncacheable(&key, NoCacheReason::Custom("Skipping"));
        assert!(predictor.cacheable_prediction(&key));
    }

    #[test]
    fn test_mark_uncacheable_lru() {
        let predictor = Predictor::<1>::new(3, None);
        let key1 = CacheKey::new("a", "b", "c");
        predictor.mark_uncacheable(&key1, NoCacheReason::OriginNotCache);
        assert!(!predictor.cacheable_prediction(&key1));

        let key2 = CacheKey::new("a", "bc", "c");
        predictor.mark_uncacheable(&key2, NoCacheReason::OriginNotCache);
        assert!(!predictor.cacheable_prediction(&key2));

        let key3 = CacheKey::new("a", "cd", "c");
        predictor.mark_uncacheable(&key3, NoCacheReason::OriginNotCache);
        assert!(!predictor.cacheable_prediction(&key3));

        // promote / reinsert key1
        predictor.mark_uncacheable(&key1, NoCacheReason::OriginNotCache);

        let key4 = CacheKey::new("a", "de", "c");
        predictor.mark_uncacheable(&key4, NoCacheReason::OriginNotCache);
        assert!(!predictor.cacheable_prediction(&key4));

        // key 1 was recently used
        assert!(!predictor.cacheable_prediction(&key1));
        // key 2 was evicted
        assert!(predictor.cacheable_prediction(&key2));
        assert!(!predictor.cacheable_prediction(&key3));
        assert!(!predictor.cacheable_prediction(&key4));
    }
}


================================================
FILE: pingora-cache/src/put.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Cache Put module

use crate::max_file_size::ERR_RESPONSE_TOO_LARGE;
use crate::*;
use bytes::Bytes;
use http::header;
use log::warn;
use pingora_core::protocols::http::{
    v1::common::header_value_content_length, HttpTask, ServerSession,
};
use pingora_error::Error;

/// The interface to define cache put behavior
pub trait CachePut {
    /// Return whether to cache the asset according to the given response header.
    fn cacheable(&self, response: ResponseHeader) -> RespCacheable {
        let cc = cache_control::CacheControl::from_resp_headers(&response);
        filters::resp_cacheable(cc.as_ref(), response, false, Self::cache_defaults())
    }

    /// Return the [CacheMetaDefaults]
    fn cache_defaults() -> &'static CacheMetaDefaults;

    /// Put interesting things in the span given the parsed response header.
    fn trace_header(&mut self, _response: &ResponseHeader) {}
}

use parse_response::ResponseParse;

/// The cache put context
pub struct CachePutCtx<C: CachePut> {
    cache_put: C, // the user defined cache put behavior
    key: CacheKey,
    storage: &'static (dyn storage::Storage + Sync), // static for now
    eviction: Option<&'static (dyn eviction::EvictionManager + Sync)>,
    miss_handler: Option<MissHandler>,
    max_file_size_tracker: Option<MaxFileSizeTracker>,
    meta: Option<CacheMeta>,
    parser: ResponseParse,
    // FIXME: cache put doesn't have cache lock but some storage cannot handle concurrent put
    // to the same asset.
    trace: trace::Span,
}

impl<C: CachePut> CachePutCtx<C> {
    /// Create a new [CachePutCtx]
    pub fn new(
        cache_put: C,
        key: CacheKey,
        storage: &'static (dyn storage::Storage + Sync),
        eviction: Option<&'static (dyn eviction::EvictionManager + Sync)>,
        trace: trace::Span,
    ) -> Self {
        CachePutCtx {
            cache_put,
            key,
            storage,
            eviction,
            miss_handler: None,
            max_file_size_tracker: None,
            meta: None,
            parser: ResponseParse::new(),
            trace,
        }
    }

    /// Set the max cacheable size limit
    pub fn set_max_file_size_bytes(&mut self, max_file_size_bytes: usize) {
        self.max_file_size_tracker = Some(MaxFileSizeTracker::new(max_file_size_bytes));
    }

    async fn put_header(&mut self, meta: CacheMeta) -> Result<()> {
        let mut trace = self.trace.child("cache put header", |o| o.start());
        let miss_handler = self
            .storage
            .get_miss_handler(&self.key, &meta, &trace.handle())
            .await?;
        trace::tag_span_with_meta(&mut trace, &meta);
        self.miss_handler = Some(miss_handler);
        self.meta = Some(meta);
        Ok(())
    }

    async fn put_body(&mut self, data: Bytes, eof: bool) -> Result<()> {
        // fail if writing the body would exceed the max_file_size_bytes
        if let Some(size_tracker) = self.max_file_size_tracker.as_mut() {
            let body_size_allowed = size_tracker.add_body_bytes(data.len());
            if !body_size_allowed {
                return Error::e_explain(
                    ERR_RESPONSE_TOO_LARGE,
                    format!(
                        "writing data of size {} bytes would exceed max file size of {} bytes",
                        data.len(),
                        size_tracker.max_file_size_bytes(),
                    ),
                );
            }
        }

        let miss_handler = self.miss_handler.as_mut().unwrap();
        miss_handler.write_body(data, eof).await
    }

    async fn finish(&mut self) -> Result<()> {
        let Some(miss_handler) = self.miss_handler.take() else {
            // no miss_handler, uncacheable
            return Ok(());
        };
        let finish = miss_handler.finish().await?;
        if let Some(eviction) = self.eviction.as_ref() {
            let cache_key = self.key.to_compact();
            let meta = self.meta.as_ref().unwrap();
            let evicted = match finish {
                MissFinishType::Appended(delta, max_size) => {
                    eviction.increment_weight(&cache_key, delta, max_size)
                }
                MissFinishType::Created(size) => {
                    eviction.admit(cache_key, size, meta.0.internal.fresh_until)
                }
            };
            // actual eviction can be done async
            let trace = self
                .trace
                .child("cache put eviction", |o| o.start())
                .handle();
            let storage = self.storage;
            tokio::task::spawn(async move {
                for item in evicted {
                    if let Err(e) = storage.purge(&item, PurgeType::Eviction, &trace).await {
                        warn!("Failed to purge {item} during eviction for cache put: {e}");
                    }
                }
            });
        }

        Ok(())
    }

    fn trace_header(&mut self, header: &ResponseHeader) {
        self.trace.set_tag(|| {
            Tag::new(
                "cache-control",
                header
                    .headers
                    .get_all(http::header::CACHE_CONTROL)
                    .into_iter()
                    .map(|v| String::from_utf8_lossy(v.as_bytes()).to_string())
                    .collect::<Vec<_>>()
                    .join(","),
            )
        });
    }

    async fn do_cache_put(&mut self, data: &[u8]) -> Result<Option<NoCacheReason>> {
        let tasks = self.parser.inject_data(data)?;
        for task in tasks {
            match task {
                HttpTask::Header(header, _eos) => {
                    self.trace_header(&header);
                    match self.cache_put.cacheable(*header) {
                        RespCacheable::Cacheable(meta) => {
                            if let Some(max_file_size_tracker) = &self.max_file_size_tracker {
                                let content_length_hdr = meta.headers().get(header::CONTENT_LENGTH);
                                if let Some(content_length) =
                                    header_value_content_length(content_length_hdr)
                                {
                                    if content_length > max_file_size_tracker.max_file_size_bytes()
                                    {
                                        return Ok(Some(NoCacheReason::ResponseTooLarge));
                                    }
                                }
                            }

                            self.put_header(meta).await?;
                        }
                        RespCacheable::Uncacheable(reason) => {
                            return Ok(Some(reason));
                        }
                    }
                }
                HttpTask::Body(data, eos) => {
                    if let Some(data) = data {
                        self.put_body(data, eos).await?;
                    }
                }
                _ => {
                    panic!("unexpected HttpTask during cache put {task:?}");
                }
            }
        }
        Ok(None)
    }

    /// Start the cache put logic for the given request
    ///
    /// This function will start to read the request body to put into cache.
    /// Return:
    /// - `Ok(None)` when the payload will be cache.
    /// - `Ok(Some(reason))` when the payload is not cacheable
    pub async fn cache_put(
        &mut self,
        session: &mut ServerSession,
    ) -> Result<Option<NoCacheReason>> {
        let mut no_cache_reason = None;
        while let Some(data) = session.read_request_body().await? {
            if no_cache_reason.is_some() {
                // even uncacheable, the entire body needs to be drains for 1. downstream
                // not throwing errors 2. connection reuse
                continue;
            }
            no_cache_reason = self.do_cache_put(&data).await?
        }
        self.parser.finish()?;
        self.finish().await?;

        if let Some(reason) = no_cache_reason {
            self.trace
                .set_tag(|| Tag::new("uncacheable_reason", reason.as_str()));
        }

        Ok(no_cache_reason)
    }
}

#[cfg(test)]
mod test {
    use super::*;
    use cf_rustracing::span::Span;
    use once_cell::sync::Lazy;

    struct TestCachePut();
    impl CachePut for TestCachePut {
        fn cache_defaults() -> &'static CacheMetaDefaults {
            const DEFAULT: CacheMetaDefaults =
                CacheMetaDefaults::new(|_| Some(Duration::from_secs(1)), 1, 1);
            &DEFAULT
        }
    }

    type TestCachePutCtx = CachePutCtx<TestCachePut>;
    static CACHE_BACKEND: Lazy<MemCache> = Lazy::new(MemCache::new);

    #[tokio::test]
    async fn test_cache_put() {
        let key = CacheKey::new("", "a", "1");
        let span = Span::inactive();
        let put = TestCachePut();
        let mut ctx = TestCachePutCtx::new(put, key.clone(), &*CACHE_BACKEND, None, span);
        let payload = b"HTTP/1.1 200 OK\r\n\
        Date: Thu, 26 Apr 2018 05:42:05 GMT\r\n\
        Content-Type: text/html; charset=utf-8\r\n\
        Connection: keep-alive\r\n\
        X-Frame-Options: SAMEORIGIN\r\n\
        Cache-Control: public, max-age=1\r\n\
        Server: origin-server\r\n\
        Content-Length: 4\r\n\r\nrust";
        // here we skip mocking a real http session for simplicity
        let res = ctx.do_cache_put(payload).await.unwrap();
        assert!(res.is_none()); // cacheable
        ctx.parser.finish().unwrap();
        ctx.finish().await.unwrap();

        let span = Span::inactive();
        let (meta, mut hit) = CACHE_BACKEND
            .lookup(&key, &span.handle())
            .await
            .unwrap()
            .unwrap();
        assert_eq!(
            meta.headers().get("date").unwrap(),
            "Thu, 26 Apr 2018 05:42:05 GMT"
        );
        let data = hit.read_body().await.unwrap().unwrap();
        assert_eq!(data, "rust");
    }

    #[tokio::test]
    async fn test_cache_put_uncacheable() {
        let key = CacheKey::new("", "a", "1");
        let span = Span::inactive();
        let put = TestCachePut();
        let mut ctx = TestCachePutCtx::new(put, key.clone(), &*CACHE_BACKEND, None, span);
        let payload = b"HTTP/1.1 200 OK\r\n\
        Date: Thu, 26 Apr 2018 05:42:05 GMT\r\n\
        Content-Type: text/html; charset=utf-8\r\n\
        Connection: keep-alive\r\n\
        X-Frame-Options: SAMEORIGIN\r\n\
        Cache-Control: no-store\r\n\
        Server: origin-server\r\n\
        Content-Length: 4\r\n\r\nrust";
        // here we skip mocking a real http session for simplicity
        let no_cache = ctx.do_cache_put(payload).await.unwrap().unwrap();
        assert_eq!(no_cache, NoCacheReason::OriginNotCache);
        ctx.parser.finish().unwrap();
        ctx.finish().await.unwrap();
    }

    #[tokio::test]
    async fn test_cache_put_204_invalid_body() {
        let key = CacheKey::new("", "b", "1");
        let span = Span::inactive();
        let put = TestCachePut();
        let mut ctx = TestCachePutCtx::new(put, key.clone(), &*CACHE_BACKEND, None, span);
        let payload = b"HTTP/1.1 204 OK\r\n\
        Date: Thu, 26 Apr 2018 05:42:05 GMT\r\n\
        Content-Type: text/html; charset=utf-8\r\n\
        Connection: keep-alive\r\n\
        X-Frame-Options: SAMEORIGIN\r\n\
        Cache-Control: public, max-age=1\r\n\
        Server: origin-server\r\n\
        Content-Length: 4\r\n\r\n";
        // here we skip mocking a real http session for simplicity
        let res = ctx.do_cache_put(payload).await.unwrap();
        assert!(res.is_none()); // cacheable
                                // 204 should not have body, invalid client input may try to pass one
        let res = ctx.do_cache_put(b"rust").await.unwrap();
        assert!(res.is_none()); // still cacheable
        ctx.parser.finish().unwrap();
        ctx.finish().await.unwrap();

        let span = Span::inactive();
        let (meta, mut hit) = CACHE_BACKEND
            .lookup(&key, &span.handle())
            .await
            .unwrap()
            .unwrap();
        assert_eq!(
            meta.headers().get("date").unwrap(),
            "Thu, 26 Apr 2018 05:42:05 GMT"
        );
        // just treated as empty body
        // (TODO: should we reset content-length/transfer-encoding
        // headers on 204/304?)
        let data = hit.read_body().await.unwrap().unwrap();
        assert!(data.is_empty());
    }

    #[tokio::test]
    async fn test_cache_put_extra_body() {
        let key = CacheKey::new("", "c", "1");
        let span = Span::inactive();
        let put = TestCachePut();
        let mut ctx = TestCachePutCtx::new(put, key.clone(), &*CACHE_BACKEND, None, span);
        let payload = b"HTTP/1.1 200 OK\r\n\
        Date: Thu, 26 Apr 2018 05:42:05 GMT\r\n\
        Content-Type: text/html; charset=utf-8\r\n\
        Connection: keep-alive\r\n\
        X-Frame-Options: SAMEORIGIN\r\n\
        Cache-Control: public, max-age=1\r\n\
        Server: origin-server\r\n\
        Content-Length: 4\r\n\r\n";
        // here we skip mocking a real http session for simplicity
        let res = ctx.do_cache_put(payload).await.unwrap();
        assert!(res.is_none()); // cacheable
                                // pass in more extra request body that needs to be drained
        let res = ctx.do_cache_put(b"rustab").await.unwrap();
        assert!(res.is_none()); // still cacheable
        let res = ctx.do_cache_put(b"cdef").await.unwrap();
        assert!(res.is_none()); // still cacheable
        ctx.parser.finish().unwrap();
        ctx.finish().await.unwrap();

        let span = Span::inactive();
        let (meta, mut hit) = CACHE_BACKEND
            .lookup(&key, &span.handle())
            .await
            .unwrap()
            .unwrap();
        assert_eq!(
            meta.headers().get("date").unwrap(),
            "Thu, 26 Apr 2018 05:42:05 GMT"
        );
        let data = hit.read_body().await.unwrap().unwrap();
        // body only contains specified content-length bounds
        assert_eq!(data, "rust");
    }
}

// maybe this can simplify some logic in pingora::h1

mod parse_response {
    use super::*;
    use bstr::ByteSlice;
    use bytes::BytesMut;
    use httparse::Status;
    use pingora_error::{
        Error,
        ErrorType::{self, *},
    };

    pub const INCOMPLETE_BODY: ErrorType = ErrorType::new("IncompleteHttpBody");

    const MAX_HEADERS: usize = 256;
    const INIT_HEADER_BUF_SIZE: usize = 4096;

    #[derive(Debug, Clone, Copy, PartialEq)]
    enum ParseState {
        Init,
        PartialHeader,
        PartialBodyContentLength(usize, usize),
        PartialBody(usize),
        Done(usize),
        Invalid(httparse::Error),
    }

    impl ParseState {
        fn is_done(&self) -> bool {
            matches!(self, Self::Done(_))
        }
        fn read_header(&self) -> bool {
            matches!(self, Self::Init | Self::PartialHeader)
        }
        fn read_body(&self) -> bool {
            matches!(
                self,
                Self::PartialBodyContentLength(..) | Self::PartialBody(_)
            )
        }
    }

    pub(super) struct ResponseParse {
        state: ParseState,
        buf: BytesMut,
        header_bytes: Bytes,
    }

    impl ResponseParse {
        pub fn new() -> Self {
            ResponseParse {
                state: ParseState::Init,
                buf: BytesMut::with_capacity(INIT_HEADER_BUF_SIZE),
                header_bytes: Bytes::new(),
            }
        }

        pub fn inject_data(&mut self, data: &[u8]) -> Result<Vec<HttpTask>> {
            if self.state.is_done() {
                // just ignore extra response body after parser is done
                // could be invalid body appended to a no-content status
                // or invalid body after content-length
                // TODO: consider propagating an error to the client
                return Ok(vec![]);
            }

            self.put_data(data);

            let mut tasks = vec![];
            while !self.state.is_done() {
                if self.state.read_header() {
                    let header = self.parse_header()?;
                    let Some(header) = header else {
                        break;
                    };
                    tasks.push(HttpTask::Header(Box::new(header), self.state.is_done()));
                } else if self.state.read_body() {
                    let body = self.parse_body()?;
                    let Some(body) = body else {
                        break;
                    };
                    tasks.push(HttpTask::Body(Some(body), self.state.is_done()));
                } else {
                    break;
                }
            }
            Ok(tasks)
        }

        fn put_data(&mut self, data: &[u8]) {
            use ParseState::*;
            if matches!(self.state, Done(_) | Invalid(_)) {
                panic!("Wrong phase {:?}", self.state);
            }
            self.buf.extend_from_slice(data);
        }

        fn parse_header(&mut self) -> Result<Option<ResponseHeader>> {
            let mut headers = [httparse::EMPTY_HEADER; MAX_HEADERS];
            let mut resp = httparse::Response::new(&mut headers);
            let mut parser = httparse::ParserConfig::default();
            parser.allow_spaces_after_header_name_in_responses(true);
            parser.allow_obsolete_multiline_headers_in_responses(true);

            let res = parser.parse_response(&mut resp, &self.buf);
            let res = match res {
                Ok(res) => res,
                Err(e) => {
                    self.state = ParseState::Invalid(e);
                    return Error::e_because(
                        InvalidHTTPHeader,
                        format!("buf: {:?}", self.buf.as_bstr()),
                        e,
                    );
                }
            };

            let split_to = match res {
                Status::Complete(s) => s,
                Status::Partial => {
                    self.state = ParseState::PartialHeader;
                    return Ok(None);
                }
            };
            // safe to unwrap, valid response always has code set.
            let mut response =
                ResponseHeader::build(resp.code.unwrap(), Some(resp.headers.len())).unwrap();
            for header in resp.headers {
                // TODO: consider hold a Bytes and all header values can be Bytes referencing the
                // original buffer without reallocation
                response.append_header(header.name.to_owned(), header.value.to_owned())?;
            }
            // TODO: see above, we can make header value `Bytes` referencing header_bytes
            let header_bytes = self.buf.split_to(split_to).freeze();
            self.header_bytes = header_bytes;
            self.state = body_type(&response);

            Ok(Some(response))
        }

        fn parse_body(&mut self) -> Result<Option<Bytes>> {
            use ParseState::*;
            if self.buf.is_empty() {
                return Ok(None);
            }
            match self.state {
                Init | PartialHeader | Invalid(_) => {
                    panic!("Wrong phase {:?}", self.state);
                }
                Done(_) => Ok(None),
                PartialBodyContentLength(total, mut seen) => {
                    let end = if total < self.buf.len() + seen {
                        // TODO: warn! more data than expected
                        total - seen
                    } else {
                        self.buf.len()
                    };
                    seen += end;
                    if seen >= total {
                        self.state = Done(seen);
                    } else {
                        self.state = PartialBodyContentLength(total, seen);
                    }
                    Ok(Some(self.buf.split_to(end).freeze()))
                }
                PartialBody(seen) => {
                    self.state = PartialBody(seen + self.buf.len());
                    Ok(Some(self.buf.split().freeze()))
                }
            }
        }

        pub fn finish(&mut self) -> Result<()> {
            if let ParseState::PartialBody(seen) = self.state {
                self.state = ParseState::Done(seen);
            }
            if !self.state.is_done() {
                Error::e_explain(INCOMPLETE_BODY, format!("{:?}", self.state))
            } else {
                Ok(())
            }
        }
    }

    fn body_type(resp: &ResponseHeader) -> ParseState {
        use http::StatusCode;

        if matches!(
            resp.status,
            StatusCode::NO_CONTENT | StatusCode::NOT_MODIFIED
        ) {
            // these status codes cannot have body by definition
            return ParseState::Done(0);
        }
        if let Some(cl) = resp.headers.get(http::header::CONTENT_LENGTH) {
            // ignore invalid header value
            if let Some(cl) = std::str::from_utf8(cl.as_bytes())
                .ok()
                .and_then(|cl| cl.parse::<usize>().ok())
            {
                return if cl == 0 {
                    ParseState::Done(0)
                } else {
                    ParseState::PartialBodyContentLength(cl, 0)
                };
            }
        }
        // HTTP/1.0 and chunked encoding are both treated as PartialBody
        // The response body payload should _not_ be chunked encoded
        // even if the Transfer-Encoding: chunked header is added
        ParseState::PartialBody(0)
    }

    #[cfg(test)]
    mod test {
        use super::*;

        #[test]
        fn test_basic_response() {
            let input = b"HTTP/1.1 200 OK\r\n\r\n";
            let mut parser = ResponseParse::new();
            let output = parser.inject_data(input).unwrap();
            assert_eq!(output.len(), 1);
            let HttpTask::Header(header, eos) = &output[0] else {
                panic!("{:?}", output);
            };
            assert_eq!(header.status, 200);
            assert!(!eos);

            let body = b"abc";
            let output = parser.inject_data(body).unwrap();
            assert_eq!(output.len(), 1);
            let HttpTask::Body(data, _eos) = &output[0] else {
                panic!("{:?}", output);
            };
            assert_eq!(data.as_ref().unwrap(), &body[..]);
            parser.finish().unwrap();
        }

        #[test]
        fn test_partial_response_headers() {
            let input = b"HTTP/1.1 200 OK\r\n";
            let mut parser = ResponseParse::new();
            let output = parser.inject_data(input).unwrap();
            // header is not complete
            assert_eq!(output.len(), 0);

            let output = parser
                .inject_data("Server: pingora\r\n\r\n".as_bytes())
                .unwrap();
            assert_eq!(output.len(), 1);
            let HttpTask::Header(header, eos) = &output[0] else {
                panic!("{:?}", output);
            };
            assert_eq!(header.status, 200);
            assert_eq!(header.headers.get("Server").unwrap(), "pingora");
            assert!(!eos);
        }

        #[test]
        fn test_invalid_headers() {
            let input = b"HTP/1.1 200 OK\r\nServer: pingora\r\n\r\n";
            let mut parser = ResponseParse::new();
            let output = parser.inject_data(input);
            // header is not complete
            assert!(output.is_err());
            match parser.state {
                ParseState::Invalid(httparse::Error::Version) => {}
                _ => panic!("should have failed to parse"),
            }
        }

        #[test]
        fn test_body_content_length() {
            let input = b"HTTP/1.1 200 OK\r\nContent-Length: 6\r\n\r\nabc";
            let mut parser = ResponseParse::new();
            let output = parser.inject_data(input).unwrap();

            assert_eq!(output.len(), 2);
            let HttpTask::Header(header, _eos) = &output[0] else {
                panic!("{:?}", output);
            };
            assert_eq!(header.status, 200);

            let HttpTask::Body(data, eos) = &output[1] else {
                panic!("{:?}", output);
            };
            assert_eq!(data.as_ref().unwrap(), "abc");
            assert!(!eos);

            let output = parser.inject_data(b"def").unwrap();
            assert_eq!(output.len(), 1);
            let HttpTask::Body(data, eos) = &output[0] else {
                panic!("{:?}", output);
            };
            assert_eq!(data.as_ref().unwrap(), "def");
            assert!(eos);

            parser.finish().unwrap();
        }

        #[test]
        fn test_body_chunked() {
            let input = b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\nrust";
            let mut parser = ResponseParse::new();
            let output = parser.inject_data(input).unwrap();

            assert_eq!(output.len(), 2);
            let HttpTask::Header(header, _eos) = &output[0] else {
                panic!("{:?}", output);
            };
            assert_eq!(header.status, 200);

            let HttpTask::Body(data, eos) = &output[1] else {
                panic!("{:?}", output);
            };
            assert_eq!(data.as_ref().unwrap(), "rust");
            assert!(!eos);

            parser.finish().unwrap();
        }

        #[test]
        fn test_body_content_length_early() {
            let input = b"HTTP/1.1 200 OK\r\nContent-Length: 6\r\n\r\nabc";
            let mut parser = ResponseParse::new();
            let output = parser.inject_data(input).unwrap();

            assert_eq!(output.len(), 2);
            let HttpTask::Header(header, _eos) = &output[0] else {
                panic!("{:?}", output);
            };
            assert_eq!(header.status, 200);

            let HttpTask::Body(data, eos) = &output[1] else {
                panic!("{:?}", output);
            };
            assert_eq!(data.as_ref().unwrap(), "abc");
            assert!(!eos);

            parser.finish().unwrap_err();
        }

        #[test]
        fn test_body_content_length_more_data() {
            let input = b"HTTP/1.1 200 OK\r\nContent-Length: 2\r\n\r\nabc";
            let mut parser = ResponseParse::new();
            let output = parser.inject_data(input).unwrap();

            assert_eq!(output.len(), 2);
            let HttpTask::Header(header, _eos) = &output[0] else {
                panic!("{:?}", output);
            };
            assert_eq!(header.status, 200);

            let HttpTask::Body(data, eos) = &output[1] else {
                panic!("{:?}", output);
            };
            assert_eq!(data.as_ref().unwrap(), "ab");
            assert!(eos);

            // extra data is dropped without error
            parser.finish().unwrap();
        }

        #[test]
        fn test_body_chunked_partial_chunk() {
            let input = b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\nru";
            let mut parser = ResponseParse::new();
            let output = parser.inject_data(input).unwrap();

            assert_eq!(output.len(), 2);
            let HttpTask::Header(header, _eos) = &output[0] else {
                panic!("{:?}", output);
            };
            assert_eq!(header.status, 200);

            let HttpTask::Body(data, eos) = &output[1] else {
                panic!("{:?}", output);
            };
            assert_eq!(data.as_ref().unwrap(), "ru");
            assert!(!eos);

            let output = parser.inject_data(b"st\r\n").unwrap();
            assert_eq!(output.len(), 1);
            let HttpTask::Body(data, eos) = &output[0] else {
                panic!("{:?}", output);
            };
            assert_eq!(data.as_ref().unwrap(), "st\r\n");
            assert!(!eos);
        }

        #[test]
        fn test_no_body_content_length() {
            let input = b"HTTP/1.1 200 OK\r\nContent-Length: 0\r\n\r\n";
            let mut parser = ResponseParse::new();
            let output = parser.inject_data(input).unwrap();

            assert_eq!(output.len(), 1);
            let HttpTask::Header(header, eos) = &output[0] else {
                panic!("{:?}", output);
            };
            assert_eq!(header.status, 200);
            assert!(eos);

            parser.finish().unwrap();
        }

        #[test]
        fn test_no_body_304_no_content_length() {
            let input = b"HTTP/1.1 304 Not Modified\r\nCache-Control: public, max-age=10\r\n\r\n";
            let mut parser = ResponseParse::new();
            let output = parser.inject_data(input).unwrap();

            assert_eq!(output.len(), 1);
            let HttpTask::Header(header, eos) = &output[0] else {
                panic!("{:?}", output);
            };
            assert_eq!(header.status, 304);
            assert!(eos);

            parser.finish().unwrap();
        }

        #[test]
        fn test_204_with_chunked_body() {
            let input = b"HTTP/1.1 204 No Content\r\nCache-Control: public, max-age=10\r\nTransfer-Encoding: chunked\r\n\r\n";
            let mut parser = ResponseParse::new();
            let output = parser.inject_data(input).unwrap();

            assert_eq!(output.len(), 1);
            let HttpTask::Header(header, eos) = &output[0] else {
                panic!("{:?}", output);
            };
            assert_eq!(header.status, 204);
            assert!(eos);

            // 204 should not have a body, parser ignores bad input
            let output = parser.inject_data(b"4\r\nrust\r\n0\r\n\r\n").unwrap();
            assert!(output.is_empty());
            parser.finish().unwrap();
        }

        #[test]
        fn test_204_with_content_length() {
            let input = b"HTTP/1.1 204 No Content\r\nCache-Control: public, max-age=10\r\nContent-Length: 4\r\n\r\n";
            let mut parser = ResponseParse::new();
            let output = parser.inject_data(input).unwrap();

            assert_eq!(output.len(), 1);
            let HttpTask::Header(header, eos) = &output[0] else {
                panic!("{:?}", output);
            };
            assert_eq!(header.status, 204);
            assert!(eos);

            // 204 should not have a body, parser ignores bad input
            let output = parser.inject_data(b"rust").unwrap();
            assert!(output.is_empty());
            parser.finish().unwrap();
        }

        #[test]
        fn test_200_with_zero_content_length_more_data() {
            let input = b"HTTP/1.1 200 OK\r\nCache-Control: public, max-age=10\r\nContent-Length: 0\r\n\r\n";
            let mut parser = ResponseParse::new();
            let output = parser.inject_data(input).unwrap();

            assert_eq!(output.len(), 1);
            let HttpTask::Header(header, eos) = &output[0] else {
                panic!("{:?}", output);
            };
            assert_eq!(header.status, 200);
            assert!(eos);

            let output = parser.inject_data(b"rust").unwrap();
            assert!(output.is_empty());
            parser.finish().unwrap();
        }
    }
}


================================================
FILE: pingora-cache/src/storage.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Cache backend storage abstraction

use super::{CacheKey, CacheMeta};
use crate::key::CompactCacheKey;
use crate::trace::SpanHandle;

use async_trait::async_trait;
use pingora_error::Result;
use std::any::Any;

/// The reason a purge() is called
#[derive(Debug, Clone, Copy)]
pub enum PurgeType {
    // For eviction because the cache storage is full
    Eviction,
    // For cache invalidation
    Invalidation,
}

/// Cache storage interface
#[async_trait]
pub trait Storage {
    // TODO: shouldn't have to be static

    /// Lookup the storage for the given [CacheKey].
    async fn lookup(
        &'static self,
        key: &CacheKey,
        trace: &SpanHandle,
    ) -> Result<Option<(CacheMeta, HitHandler)>>;

    /// Lookup the storage for the given [CacheKey] using a streaming write tag.
    ///
    /// When streaming partial writes is supported, the request that initiates the write will also
    /// pass an optional `streaming_write_tag` so that the storage may try to find the associated
    /// [HitHandler], for the same ongoing write.
    ///
    /// Therefore, when the write tag is set, the storage implementation should either return a
    /// [HitHandler] that can be matched to that tag, or none at all. Otherwise when the storage
    /// supports concurrent streaming writes for the same key, the calling request may receive a
    /// different body from the one it expected.
    ///
    /// By default this defers to the standard `Storage::lookup` implementation.
    async fn lookup_streaming_write(
        &'static self,
        key: &CacheKey,
        _streaming_write_tag: Option<&[u8]>,
        trace: &SpanHandle,
    ) -> Result<Option<(CacheMeta, HitHandler)>> {
        self.lookup(key, trace).await
    }

    /// Write the given [CacheMeta] to the storage. Return [MissHandler] to write the body later.
    async fn get_miss_handler(
        &'static self,
        key: &CacheKey,
        meta: &CacheMeta,
        trace: &SpanHandle,
    ) -> Result<MissHandler>;

    /// Delete the cached asset for the given key
    ///
    /// [CompactCacheKey] is used here because it is how eviction managers store the keys
    async fn purge(
        &'static self,
        key: &CompactCacheKey,
        purge_type: PurgeType,
        trace: &SpanHandle,
    ) -> Result<bool>;

    /// Update cache header and metadata for the already stored asset.
    async fn update_meta(
        &'static self,
        key: &CacheKey,
        meta: &CacheMeta,
        trace: &SpanHandle,
    ) -> Result<bool>;

    /// Whether this storage backend supports reading partially written data
    ///
    /// This is to indicate when cache should unlock readers
    fn support_streaming_partial_write(&self) -> bool {
        false
    }

    /// Helper function to cast the trait object to concrete types
    fn as_any(&self) -> &(dyn Any + Send + Sync + 'static);
}

/// Cache hit handling trait
#[async_trait]
pub trait HandleHit {
    /// Read cached body
    ///
    /// Return `None` when no more body to read.
    async fn read_body(&mut self) -> Result<Option<bytes::Bytes>>;

    /// Finish the current cache hit
    async fn finish(
        self: Box<Self>, // because self is always used as a trait object
        storage: &'static (dyn Storage + Sync),
        key: &CacheKey,
        trace: &SpanHandle,
    ) -> Result<()>;

    /// Whether this storage allows seeking to a certain range of body for single ranges.
    fn can_seek(&self) -> bool {
        false
    }

    /// Whether this storage allows seeking to a certain range of body for multipart ranges.
    ///
    /// By default uses the `can_seek` implementation.
    fn can_seek_multipart(&self) -> bool {
        self.can_seek()
    }

    /// Try to seek to a certain range of the body for single ranges.
    ///
    /// `end: None` means to read to the end of the body.
    fn seek(&mut self, _start: usize, _end: Option<usize>) -> Result<()> {
        // to prevent impl can_seek() without impl seek
        todo!("seek() needs to be implemented")
    }

    /// Try to seek to a certain range of the body for multipart ranges.
    ///
    /// Works in an identical manner to `seek()`.
    ///
    /// `end: None` means to read to the end of the body.
    ///
    /// By default uses the `seek` implementation, but hit handlers may customize the
    /// implementation specifically to anticipate multipart requests.
    fn seek_multipart(&mut self, start: usize, end: Option<usize>) -> Result<()> {
        // to prevent impl can_seek() without impl seek
        self.seek(start, end)
    }

    // TODO: fn is_stream_hit()

    /// Should we count this hit handler instance as an access in the eviction manager.
    ///
    /// Defaults to returning true to track all cache hits as accesses. Customize this if certain
    /// hits should not affect the eviction system's view of the asset.
    fn should_count_access(&self) -> bool {
        true
    }

    /// Returns the weight of the current cache hit asset to report to the eviction manager.
    ///
    /// This allows the eviction system to initialize a weight for the asset, in case it is not
    /// already tracking it (e.g. storage is out of sync with the eviction manager).
    ///
    /// Defaults to 0.
    fn get_eviction_weight(&self) -> usize {
        0
    }

    /// Helper function to cast the trait object to concrete types
    fn as_any(&self) -> &(dyn Any + Send + Sync);

    /// Helper function to cast the trait object to concrete types
    fn as_any_mut(&mut self) -> &mut (dyn Any + Send + Sync);
}

/// Hit Handler
pub type HitHandler = Box<dyn HandleHit + Sync + Send>;

/// MissFinishType
pub enum MissFinishType {
    /// A new asset was created with the given size.
    Created(usize),
    /// Appended size to existing asset, with an optional max size param.
    Appended(usize, Option<usize>),
}

/// Cache miss handling trait
#[async_trait]
pub trait HandleMiss {
    /// Write the given body to the storage
    async fn write_body(&mut self, data: bytes::Bytes, eof: bool) -> Result<()>;

    /// Finish the cache admission
    ///
    /// When `self` is dropped without calling this function, the storage should consider this write
    /// failed.
    async fn finish(
        self: Box<Self>, // because self is always used as a trait object
    ) -> Result<MissFinishType>;

    /// Return a streaming write tag recognized by the underlying [`Storage`].
    ///
    /// This is an arbitrary data identifier that is used to associate this miss handler's current
    /// write with a hit handler for the same write. This identifier will be compared by the
    /// storage during `lookup_streaming_write`.
    // This write tag is essentially an borrowed data blob of bytes retrieved from the miss handler
    // and passed to storage, which means it can support strings or small data types, e.g. bytes
    // represented by a u64.
    // The downside with the current API is that such a data blob must be owned by the miss handler
    // and stored in a way that permits retrieval as a byte slice (not computed on the fly).
    // But most use cases likely only require a simple integer and may not like the overhead of a
    // Vec/String allocation or even a Cow, though such data types can also be used here.
    fn streaming_write_tag(&self) -> Option<&[u8]> {
        None
    }
}

/// Miss Handler
pub type MissHandler = Box<dyn HandleMiss + Sync + Send>;

pub mod streaming_write {
    /// Portable u64 (sized) write id convenience type for use with streaming writes.
    ///
    /// Often an integer value is sufficient for a streaming write tag. This convenience type enables
    /// storing such a value and functions for consistent conversion between byte sequence data types.
    #[derive(Debug, Clone, Copy)]
    pub struct U64WriteId([u8; 8]);

    impl U64WriteId {
        pub fn as_bytes(&self) -> &[u8] {
            &self.0[..]
        }
    }

    impl From<u64> for U64WriteId {
        fn from(value: u64) -> U64WriteId {
            U64WriteId(value.to_be_bytes())
        }
    }
    impl From<U64WriteId> for u64 {
        fn from(value: U64WriteId) -> u64 {
            u64::from_be_bytes(value.0)
        }
    }
    impl TryFrom<&[u8]> for U64WriteId {
        type Error = std::array::TryFromSliceError;

        fn try_from(value: &[u8]) -> std::result::Result<Self, Self::Error> {
            Ok(U64WriteId(value.try_into()?))
        }
    }

    /// Portable u32 (sized) write id convenience type for use with streaming writes.
    ///
    /// Often an integer value is sufficient for a streaming write tag. This convenience type enables
    /// storing such a value and functions for consistent conversion between byte sequence data types.
    #[derive(Debug, Clone, Copy)]
    pub struct U32WriteId([u8; 4]);

    impl U32WriteId {
        pub fn as_bytes(&self) -> &[u8] {
            &self.0[..]
        }
    }

    impl From<u32> for U32WriteId {
        fn from(value: u32) -> U32WriteId {
            U32WriteId(value.to_be_bytes())
        }
    }
    impl From<U32WriteId> for u32 {
        fn from(value: U32WriteId) -> u32 {
            u32::from_be_bytes(value.0)
        }
    }
    impl TryFrom<&[u8]> for U32WriteId {
        type Error = std::array::TryFromSliceError;

        fn try_from(value: &[u8]) -> std::result::Result<Self, Self::Error> {
            Ok(U32WriteId(value.try_into()?))
        }
    }
}


================================================
FILE: pingora-cache/src/trace.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Distributed tracing helpers

use cf_rustracing_jaeger::span::SpanContextState;
use std::time::SystemTime;

use crate::{CacheMeta, CachePhase, HitStatus};

pub use cf_rustracing::tag::Tag;

pub type Span = cf_rustracing::span::Span<SpanContextState>;
pub type SpanHandle = cf_rustracing::span::SpanHandle<SpanContextState>;

#[derive(Debug)]
pub(crate) struct CacheTraceCTX {
    // parent span
    pub cache_span: Span,
    // only spans across multiple calls need to store here
    pub miss_span: Span,
    pub hit_span: Span,
}

pub fn tag_span_with_meta(span: &mut Span, meta: &CacheMeta) {
    fn ts2epoch(ts: SystemTime) -> f64 {
        ts.duration_since(SystemTime::UNIX_EPOCH)
            .unwrap_or_default() // should never overflow but be safe here
            .as_secs_f64()
    }
    let internal = &meta.0.internal;
    span.set_tags(|| {
        [
            Tag::new("created", ts2epoch(internal.created)),
            Tag::new("fresh_until", ts2epoch(internal.fresh_until)),
            Tag::new("updated", ts2epoch(internal.updated)),
            Tag::new("stale_if_error_sec", internal.stale_if_error_sec as i64),
            Tag::new(
                "stale_while_revalidate_sec",
                internal.stale_while_revalidate_sec as i64,
            ),
            Tag::new("variance", internal.variance.is_some()),
        ]
    });
}

impl CacheTraceCTX {
    pub fn new() -> Self {
        CacheTraceCTX {
            cache_span: Span::inactive(),
            miss_span: Span::inactive(),
            hit_span: Span::inactive(),
        }
    }

    pub fn enable(&mut self, cache_span: Span) {
        self.cache_span = cache_span;
    }

    pub fn get_cache_span(&self) -> SpanHandle {
        self.cache_span.handle()
    }

    #[inline]
    pub fn child(&self, name: &'static str) -> Span {
        self.cache_span.child(name, |o| o.start())
    }

    pub fn start_miss_span(&mut self) {
        self.miss_span = self.child("miss");
    }

    pub fn get_miss_span(&self) -> SpanHandle {
        self.miss_span.handle()
    }

    pub fn finish_miss_span(&mut self) {
        self.miss_span.set_finish_time(SystemTime::now);
    }

    pub fn start_hit_span(&mut self, phase: CachePhase, hit_status: HitStatus) {
        self.hit_span = self.child("hit");
        self.hit_span.set_tag(|| Tag::new("phase", phase.as_str()));
        self.hit_span
            .set_tag(|| Tag::new("status", hit_status.as_str()));
    }

    pub fn get_hit_span(&self) -> SpanHandle {
        self.hit_span.handle()
    }

    pub fn finish_hit_span(&mut self) {
        self.hit_span.set_finish_time(SystemTime::now);
    }

    pub fn log_meta_in_hit_span(&mut self, meta: &CacheMeta) {
        tag_span_with_meta(&mut self.hit_span, meta);
    }

    pub fn log_meta_in_miss_span(&mut self, meta: &CacheMeta) {
        tag_span_with_meta(&mut self.miss_span, meta);
    }
}


================================================
FILE: pingora-cache/src/variance.rs
================================================
use std::{borrow::Cow, collections::BTreeMap};

use blake2::Digest;

use crate::key::{Blake2b128, HashBinary};

/// A builder for variance keys, used for distinguishing multiple cached assets
/// at the same URL. This is intended to be easily passed to helper functions,
/// which can each populate a portion of the variance.
pub struct VarianceBuilder<'a> {
    values: BTreeMap<Cow<'a, str>, Cow<'a, [u8]>>,
}

impl<'a> VarianceBuilder<'a> {
    /// Create an empty variance key. Has no variance by default - add some variance using
    /// [`Self::add_value`].
    pub fn new() -> Self {
        VarianceBuilder {
            values: BTreeMap::new(),
        }
    }

    /// Add a byte string to the variance key. Not sensitive to insertion order.
    /// `value` is intended to take either `&str` or `&[u8]`.
    pub fn add_value(&mut self, name: &'a str, value: &'a (impl AsRef<[u8]> + ?Sized)) {
        self.values
            .insert(name.into(), Cow::Borrowed(value.as_ref()));
    }

    /// Move a byte string to the variance key. Not sensitive to insertion order. Useful when
    /// writing helper functions which generate a value then add said value to the VarianceBuilder.
    /// Without this, the helper function would have to move the value to the calling function
    /// to extend its lifetime to at least match the VarianceBuilder.
    pub fn add_owned_value(&mut self, name: &'a str, value: Vec<u8>) {
        self.values.insert(name.into(), Cow::Owned(value));
    }

    /// Check whether this variance key actually has variance, or just refers to the root asset
    pub fn has_variance(&self) -> bool {
        !self.values.is_empty()
    }

    /// Hash this variance key. Returns [`None`] if [`Self::has_variance`] is false.
    pub fn finalize(self) -> Option<HashBinary> {
        const SALT: &[u8; 1] = &[0u8; 1];
        if self.has_variance() {
            let mut hash = Blake2b128::new();
            for (name, value) in self.values.iter() {
                hash.update(name.as_bytes());
                hash.update(SALT);
                hash.update(value);
                hash.update(SALT);
            }
            Some(hash.finalize().into())
        } else {
            None
        }
    }
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn test_basic() {
        let key_empty = VarianceBuilder::new().finalize();
        assert_eq!(None, key_empty);

        let mut key_value = VarianceBuilder::new();
        key_value.add_value("a", "a");
        let key_value = key_value.finalize();

        let mut key_owned_value = VarianceBuilder::new();
        key_owned_value.add_owned_value("a", "a".as_bytes().to_vec());
        let key_owned_value = key_owned_value.finalize();

        assert_ne!(key_empty, key_value);
        assert_ne!(key_empty, key_owned_value);
        assert_eq!(key_value, key_owned_value);
    }

    #[test]
    fn test_value_ordering() {
        let mut key_abc = VarianceBuilder::new();
        key_abc.add_value("a", "a");
        key_abc.add_value("b", "b");
        key_abc.add_value("c", "c");
        let key_abc = key_abc.finalize().unwrap();

        let mut key_bac = VarianceBuilder::new();
        key_bac.add_value("b", "b");
        key_bac.add_value("a", "a");
        key_bac.add_value("c", "c");
        let key_bac = key_bac.finalize().unwrap();

        let mut key_cba = VarianceBuilder::new();
        key_cba.add_value("c", "c");
        key_cba.add_value("b", "b");
        key_cba.add_value("a", "a");
        let key_cba = key_cba.finalize().unwrap();

        assert_eq!(key_abc, key_bac);
        assert_eq!(key_abc, key_cba);
    }

    #[test]
    fn test_value_overriding() {
        let mut key_a = VarianceBuilder::new();
        key_a.add_value("a", "a");
        let key_a = key_a.finalize().unwrap();

        let mut key_b = VarianceBuilder::new();
        key_b.add_value("a", "b");
        key_b.add_value("a", "a");
        let key_b = key_b.finalize().unwrap();

        assert_eq!(key_a, key_b);
    }
}


================================================
FILE: pingora-core/Cargo.toml
================================================
[package]
name = "pingora-core"
version = "0.8.0"
authors = ["Yuchen Wu <yuchen@cloudflare.com>"]
license = "Apache-2.0"
edition = "2021"
repository = "https://github.com/cloudflare/pingora"
categories = ["asynchronous", "network-programming"]
keywords = ["async", "http", "network", "pingora"]
exclude = ["tests/*"]
description = """
Pingora's APIs and traits for the core network protocols.
"""

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[lib]
name = "pingora_core"
path = "src/lib.rs"

[dependencies]
pingora-runtime = { version = "0.8.0", path = "../pingora-runtime" }
pingora-openssl = { version = "0.8.0", path = "../pingora-openssl", optional = true }
pingora-boringssl = { version = "0.8.0", path = "../pingora-boringssl", optional = true }
pingora-pool = { version = "0.8.0", path = "../pingora-pool" }
pingora-error = { version = "0.8.0", path = "../pingora-error" }
pingora-timeout = { version = "0.8.0", path = "../pingora-timeout" }
pingora-http = { version = "0.8.0", path = "../pingora-http" }
pingora-rustls = { version = "0.8.0", path = "../pingora-rustls", optional = true }
pingora-s2n = { version = "0.8.0", path = "../pingora-s2n", optional = true }
bstr = { workspace = true }
tokio = { workspace = true, features = ["net", "rt-multi-thread", "signal"] }
tokio-stream = { workspace = true }
futures = "0.3"
async-trait = { workspace = true }
httparse = { workspace = true }
bytes = { workspace = true }
http = { workspace = true }
log = { workspace = true }
h2 = { workspace = true }
derivative.workspace = true
clap = { version = "4.5", features = ["derive"] }
once_cell = { workspace = true }
serde = { version = "1.0", features = ["derive"] }
serde_yaml = "0.9"
strum = "0.26.2"
strum_macros = "0.26.2"
libc = "0.2.70"
chrono = { version = "~0.4.31", features = ["alloc"], default-features = false }
prometheus = "0.13"
sentry = { version = "0.36", features = [
    "backtrace",
    "contexts",
    "panic",
    "reqwest",
    "rustls",
], default-features = false, optional = true }
regex = "1"
percent-encoding = "2.1"
parking_lot = { version = "0.12", features = ["arc_lock"] }
socket2 = { version = ">=0.4, <1.0.0", features = ["all"] }
flate2 = { version = "1", features = ["zlib-ng"], default-features = false }
sfv = "0.10.4"
rand = "0.8"
ahash = { workspace = true }
unicase = "2"
brotli = "3"
openssl-probe = "0.1.6"
tokio-test = "0.4"
zstd = "0"
httpdate = "1"
x509-parser = { version = "0.16.0", optional = true }
ouroboros = { version = "0.18.4", optional = true }
lru = { workspace = true, optional = true }
daggy = "0.8"

[target.'cfg(unix)'.dependencies]
daemonize = "0.5.0"
nix = "~0.24.3"

[target.'cfg(windows)'.dependencies]
windows-sys = { version = "0.59.0", features = ["Win32_Networking_WinSock"] }

[dev-dependencies]
h2 = { workspace = true, features = ["unstable"] }
tokio-stream = { version = "0.1", features = ["full"] }
env_logger = "0.11"
reqwest = { version = "0.11", features = [
    "rustls-tls",
], default-features = false }
hyper = "0.14"
rstest = "0.23.0"
rustls = "0.23"

[target.'cfg(unix)'.dev-dependencies]
hyperlocal = "0.8"
jemallocator = "0.5"

[features]
default = []
openssl = ["pingora-openssl", "openssl_derived"]
boringssl = ["pingora-boringssl", "openssl_derived"]
rustls = ["pingora-rustls", "any_tls", "dep:x509-parser", "ouroboros"]
s2n = ["pingora-s2n", "any_tls", "dep:x509-parser", "ouroboros", "lru"]
patched_http1 = ["pingora-http/patched_http1"]
openssl_derived = ["any_tls"]
any_tls = []
sentry = ["dep:sentry"]
connection_filter = []


================================================
FILE: pingora-core/LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: pingora-core/examples/bootstrap_as_a_service.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Example demonstrating how to start a server using [`Server::bootstrap_as_a_service`]
//! instead of calling [`Server::bootstrap`] directly.
//!
//! # Why `bootstrap_as_a_service`?
//!
//! [`Server::bootstrap`] runs the bootstrap phase synchronously before any services start.
//! This means the calling thread blocks during socket FD acquisition and Sentry initialization.
//!
//! [`Server::bootstrap_as_a_service`] instead schedules bootstrap as a dependency-aware init
//! service. This allows other services to declare a dependency on the bootstrap handle and
//! ensures they only start after bootstrap completes — while keeping setup fully asynchronous
//! and composable with the rest of the service graph.
//!
//! Use `bootstrap_as_a_service` when:
//! - You want to integrate bootstrap into the service dependency graph
//! - You want services to wait for bootstrap without blocking the main thread
//! - You are building more complex startup sequences (e.g. multiple ordered init steps)
//!
//! # Running the example
//!
//! ```bash
//! cargo run --example bootstrap_as_a_service --package pingora-core
//! ```
//!
//! # Expected behaviour
//!
//! Bootstrap runs as a service before `MyService` starts. `MyService` declares a dependency
//! on the bootstrap handle, so it will not be started until bootstrap has completed.

use async_trait::async_trait;
use log::info;
use pingora_core::server::configuration::Opt;
#[cfg(unix)]
use pingora_core::server::ListenFds;
use pingora_core::server::{Server, ShutdownWatch};
use pingora_core::services::Service;

/// A simple application service that requires bootstrap to be complete before it starts.
pub struct MyService;

#[async_trait]
impl Service for MyService {
    async fn start_service(
        &mut self,
        #[cfg(unix)] _fds: Option<ListenFds>,
        mut shutdown: ShutdownWatch,
        _listeners_per_fd: usize,
    ) {
        info!("MyService: bootstrap is complete, starting up");

        // Keep running until a shutdown signal is received.
        shutdown.changed().await.ok();

        info!("MyService: shutting down");
    }

    fn name(&self) -> &str {
        "my_service"
    }

    fn threads(&self) -> Option<usize> {
        Some(1)
    }
}

fn main() {
    env_logger::Builder::from_default_env()
        .filter_level(log::LevelFilter::Info)
        .init();

    let opt = Opt::parse_args();
    let mut server = Server::new(Some(opt)).unwrap();

    // Schedule bootstrap as a service instead of calling server.bootstrap() directly.
    // The returned handle can be used to declare dependencies so that other services
    // only start after bootstrap has finished.
    let bootstrap_handle = server.bootstrap_as_a_service();

    // Register our application service and get its handle.
    let service_handle = server.add_service(MyService);

    // MyService will not start until the bootstrap service has signaled that it is ready.
    service_handle.add_dependency(&bootstrap_handle);

    info!("Starting server — bootstrap will run as a service before MyService starts");

    server.run_forever();
}


================================================
FILE: pingora-core/examples/client_cert.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#![cfg_attr(not(feature = "openssl"), allow(unused))]

use std::any::Any;
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
use std::sync::Arc;

use async_trait::async_trait;
use clap::Parser;
use http::header::{CONTENT_LENGTH, CONTENT_TYPE};
use http::{Response, StatusCode};
use pingora_core::apps::http_app::ServeHttp;
use pingora_core::listeners::tls::TlsSettings;
use pingora_core::listeners::TlsAccept;
use pingora_core::protocols::http::ServerSession;
use pingora_core::protocols::tls::TlsRef;
use pingora_core::server::configuration::Opt;
use pingora_core::server::Server;
use pingora_core::services::listening::Service;
use pingora_core::Result;
#[cfg(feature = "openssl")]
use pingora_openssl::{
    nid::Nid,
    ssl::{NameType, SslFiletype, SslVerifyMode},
    x509::{GeneralName, X509Name},
};

// Custom structure to hold TLS information
struct MyTlsInfo {
    // SNI (Server Name Indication) from the TLS handshake
    sni: Option<String>,
    // SANs (Subject Alternative Names) from client certificate
    sans: Vec<String>,
    // Common Name (CN) from client certificate
    common_name: Option<String>,
}

struct MyApp;

#[async_trait]
impl ServeHttp for MyApp {
    async fn response(&self, session: &mut ServerSession) -> http::Response<Vec<u8>> {
        static EMPTY_VEC: Vec<String> = vec![];

        // Extract TLS info from the session's digest extensions
        let my_tls_info = session
            .digest()
            .and_then(|digest| digest.ssl_digest.as_ref())
            .and_then(|ssl_digest| ssl_digest.extension.get::<MyTlsInfo>());
        let sni = my_tls_info
            .and_then(|my_tls_info| my_tls_info.sni.as_deref())
            .unwrap_or("<none>");
        let sans = my_tls_info
            .map(|my_tls_info| &my_tls_info.sans)
            .unwrap_or(&EMPTY_VEC);
        let common_name = my_tls_info
            .and_then(|my_tls_info| my_tls_info.common_name.as_deref())
            .unwrap_or("<none>");

        // Create response message
        let mut message = String::new();
        message += &format!("Your SNI was: {sni}\n");
        message += &format!("Your SANs were: {sans:?}\n");
        message += &format!("Client Common Name (CN): {}\n", common_name);
        let message = message.into_bytes();

        Response::builder()
            .status(StatusCode::OK)
            .header(CONTENT_TYPE, "text/plain")
            .header(CONTENT_LENGTH, message.len())
            .body(message)
            .unwrap()
    }
}

struct MyTlsCallbacks;

#[async_trait]
impl TlsAccept for MyTlsCallbacks {
    #[cfg(feature = "openssl")]
    async fn handshake_complete_callback(
        &self,
        tls_ref: &TlsRef,
    ) -> Option<Arc<dyn Any + Send + Sync>> {
        // Here you can inspect the TLS connection and return an extension if needed.

        // Extract SNI (Server Name Indication)
        let sni = tls_ref
            .servername(NameType::HOST_NAME)
            .map(ToOwned::to_owned);

        // Extract SAN (Subject Alternative Names) from the client certificate
        let sans = tls_ref
            .peer_certificate()
            .and_then(|cert| cert.subject_alt_names())
            .map_or(vec![], |sans| {
                sans.into_iter()
                    .filter_map(|san| san_to_string(&san))
                    .collect::<Vec<_>>()
            });

        // Extract Common Name (CN) from the client certificate
        let common_name = tls_ref.peer_certificate().and_then(|cert| {
            let cn = cert.subject_name().entries_by_nid(Nid::COMMONNAME).next()?;
            Some(cn.data().as_utf8().ok()?.to_string())
        });

        let tls_info = MyTlsInfo {
            sni,
            sans,
            common_name,
        };
        Some(Arc::new(tls_info))
    }
}

// Convert GeneralName of SAN to String representation
#[cfg(feature = "openssl")]
fn san_to_string(san: &GeneralName) -> Option<String> {
    if let Some(dnsname) = san.dnsname() {
        return Some(dnsname.to_owned());
    }
    if let Some(uri) = san.uri() {
        return Some(uri.to_owned());
    }
    if let Some(email) = san.email() {
        return Some(email.to_owned());
    }
    if let Some(ip) = san.ipaddress() {
        return bytes_to_ip_addr(ip).map(|addr| addr.to_string());
    }
    None
}

// Convert byte slice to IpAddr
fn bytes_to_ip_addr(bytes: &[u8]) -> Option<IpAddr> {
    match bytes.len() {
        4 => {
            let addr = Ipv4Addr::new(bytes[0], bytes[1], bytes[2], bytes[3]);
            Some(IpAddr::V4(addr))
        }
        16 => {
            let mut octets = [0u8; 16];
            octets.copy_from_slice(bytes);
            let addr = Ipv6Addr::from(octets);
            Some(IpAddr::V6(addr))
        }
        _ => None,
    }
}

// This example demonstrates an HTTP server that requires client certificates.
// The server extracts the SNI (Server Name Indication) from the TLS handshake and
// SANs (Subject Alternative Names) from the client certificate, then returns them
// as part of the HTTP response.
//
// ## How to run
//
//   cargo run -F openssl --example client_cert
//
//   # In another terminal, run the following command to test the server:
//   cd pingora-core
//   curl -k -i \
//     --cert examples/keys/clients/cert-1.pem --key examples/keys/clients/key-1.pem \
//     --resolve myapp.example.com:6196:127.0.0.1 \
//     https://myapp.example.com:6196/
//   curl -k -i \
//     --cert examples/keys/clients/cert-2.pem --key examples/keys/clients/key-2.pem \
//     --resolve myapp.example.com:6196:127.0.0.1 \
//     https://myapp.example.com:6196/
//   curl -k -i \
//     --cert examples/keys/clients/invalid-cert.pem --key examples/keys/clients/invalid-key.pem \
//     --resolve myapp.example.com:6196:127.0.0.1 \
//     https://myapp.example.com:6196/
#[cfg(feature = "openssl")]
fn main() -> Result<(), Box<dyn std::error::Error>> {
    env_logger::init();

    // read command line arguments
    let opt = Opt::parse();
    let mut my_server = Server::new(Some(opt))?;
    my_server.bootstrap();

    let mut my_app = Service::new("my app".to_owned(), MyApp);

    // Paths to server certificate, private key, and client CA certificate
    let manifest_dir = env!("CARGO_MANIFEST_DIR");
    let server_cert_path = format!("{manifest_dir}/examples/keys/server/cert.pem");
    let server_key_path = format!("{manifest_dir}/examples/keys/server/key.pem");
    let client_ca_path = format!("{manifest_dir}/examples/keys/client-ca/cert.pem");

    // Create TLS settings with callbacks
    let callbacks = Box::new(MyTlsCallbacks);
    let mut tls_settings = TlsSettings::with_callbacks(callbacks)?;
    // Set server certificate and private key
    tls_settings.set_certificate_chain_file(&server_cert_path)?;
    tls_settings.set_private_key_file(server_key_path, SslFiletype::PEM)?;
    // Require client certificate
    tls_settings.set_verify(SslVerifyMode::PEER | SslVerifyMode::FAIL_IF_NO_PEER_CERT);
    // Set CA for client certificate verification
    tls_settings.set_ca_file(&client_ca_path)?;
    // Optionally, set the list of acceptable client CAs sent to the client
    tls_settings.set_client_ca_list(X509Name::load_client_ca_file(&client_ca_path)?);

    my_app.add_tls_with_settings("0.0.0.0:6196", None, tls_settings);
    my_server.add_service(my_app);

    my_server.run_forever();
}

#[cfg(not(feature = "openssl"))]
fn main() {
    eprintln!("This example requires the 'openssl' feature to be enabled.");
}


================================================
FILE: pingora-core/examples/keys/client-ca/cert.pem
================================================
-----BEGIN CERTIFICATE-----
MIICTjCCAfWgAwIBAgIULuUoq/di4EKmLyN0YwAkd6MQjv4wCgYIKoZIzj0EAwIw
dTELMAkGA1UEBhMCVVMxEzARBgNVBAgMCkNhbGlmb3JuaWExFjAUBgNVBAcMDVNh
biBGcmFuY2lzY28xGDAWBgNVBAoMD0Nsb3VkZmxhcmUsIEluYzEfMB0GA1UEAwwW
RXhhbXBsZSBDbGllbnQgUm9vdCBDQTAeFw0yNTExMTkwNDU5MjRaFw0zNTExMTcw
NDU5MjRaMHUxCzAJBgNVBAYTAlVTMRMwEQYDVQQIDApDYWxpZm9ybmlhMRYwFAYD
VQQHDA1TYW4gRnJhbmNpc2NvMRgwFgYDVQQKDA9DbG91ZGZsYXJlLCBJbmMxHzAd
BgNVBAMMFkV4YW1wbGUgQ2xpZW50IFJvb3QgQ0EwWTATBgcqhkjOPQIBBggqhkjO
PQMBBwNCAARxcxOAR4zUDPilKpMLiBzNs+HxdW6ZBlHVA7/0VyJtSPw03IdlbtFs
FhgcIa8uQ9nrppHlrzploTA7cg7YWUoso2MwYTAPBgNVHRMBAf8EBTADAQH/MA4G
A1UdDwEB/wQEAwIBBjAdBgNVHQ4EFgQUL6S83l9AGZmmwHh+64YlUtMQzZcwHwYD
VR0jBBgwFoAUL6S83l9AGZmmwHh+64YlUtMQzZcwCgYIKoZIzj0EAwIDRwAwRAIg
cohFQxG22J2YKw+DGAidU5u3mxtB/BALxIusqd+OfFUCIGmT2GHVxz1FwK2pJrM1
FTWEcEbAw3r86iIVJBYP4qX6
-----END CERTIFICATE-----


================================================
FILE: pingora-core/examples/keys/client-ca/key.pem
================================================
-----BEGIN EC PRIVATE KEY-----
MHcCAQEEIJOxEQowpYL5VLNf+qaCEBhic8e26UyR0ku65Sk6gjMIoAoGCCqGSM49
AwEHoUQDQgAEcXMTgEeM1Az4pSqTC4gczbPh8XVumQZR1QO/9FcibUj8NNyHZW7R
bBYYHCGvLkPZ66aR5a86ZaEwO3IO2FlKLA==
-----END EC PRIVATE KEY-----


================================================
FILE: pingora-core/examples/keys/clients/cert-1.pem
================================================
-----BEGIN CERTIFICATE-----
MIICjjCCAjWgAwIBAgIUYUSqEzxm/oebfxxQmZEesZL2WFAwCgYIKoZIzj0EAwIw
dTELMAkGA1UEBhMCVVMxEzARBgNVBAgMCkNhbGlmb3JuaWExFjAUBgNVBAcMDVNh
biBGcmFuY2lzY28xGDAWBgNVBAoMD0Nsb3VkZmxhcmUsIEluYzEfMB0GA1UEAwwW
RXhhbXBsZSBDbGllbnQgUm9vdCBDQTAeFw0yNTExMTkwNTEyMThaFw0zNTExMTcw
NTEyMThaMG8xCzAJBgNVBAYTAlVTMRMwEQYDVQQIDApDYWxpZm9ybmlhMRYwFAYD
VQQHDA1TYW4gRnJhbmNpc2NvMRgwFgYDVQQKDA9DbG91ZGZsYXJlLCBJbmMxGTAX
BgNVBAMMEGV4YW1wbGUtY2xpZW50LTEwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNC
AATDe6hBwpmE4Jt//sIWGWuBDYXHezVoFeoHsDzcWo6RwyHDfm7lvnACmqWAdRUV
1GA7yfkzc1CaTqnvU8GjFdfXo4GoMIGlMAwGA1UdEwEB/wQCMAAwDgYDVR0PAQH/
BAQDAgWgMBMGA1UdJQQMMAoGCCsGAQUFBwMCMDAGA1UdEQQpMCeGJXNwaWZmZTov
L2V4YW1wbGUuY29tL2V4YW1wbGUtY2xpZW50LTEwHQYDVR0OBBYEFAjfTzgX+AVh
M+BIaU0qTgINZWOdMB8GA1UdIwQYMBaAFC+kvN5fQBmZpsB4fuuGJVLTEM2XMAoG
CCqGSM49BAMCA0cAMEQCIHyJDCvYKgxVthHcLjlEGW4Pj0Y7XnQUCJARa3jAUTd9
AiB8tSXbo6J6Jhy6nasaxT1HAZwjgMVQwdo8O8UYOXXZpQ==
-----END CERTIFICATE-----


================================================
FILE: pingora-core/examples/keys/clients/cert-2.pem
================================================
-----BEGIN CERTIFICATE-----
MIIC0zCCAnmgAwIBAgIUVQlGCD9Zryvkh9G8GZXFBa2L9kQwCgYIKoZIzj0EAwIw
dTELMAkGA1UEBhMCVVMxEzARBgNVBAgMCkNhbGlmb3JuaWExFjAUBgNVBAcMDVNh
biBGcmFuY2lzY28xGDAWBgNVBAoMD0Nsb3VkZmxhcmUsIEluYzEfMB0GA1UEAwwW
RXhhbXBsZSBDbGllbnQgUm9vdCBDQTAeFw0yNTExMTkwODA5MDlaFw0zNTExMTcw
ODA5MDlaMG8xCzAJBgNVBAYTAlVTMRMwEQYDVQQIDApDYWxpZm9ybmlhMRYwFAYD
VQQHDA1TYW4gRnJhbmNpc2NvMRgwFgYDVQQKDA9DbG91ZGZsYXJlLCBJbmMxGTAX
BgNVBAMMEGV4YW1wbGUtY2xpZW50LTIwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNC
AAS2J10rq5Rt4TjhqEjHED0UPdceuzHUcw8doLC4StBIxJIrFk9Ag0g5ti9vN4fG
kK6J11GXk/pBmu3O3s48Gsfgo4HsMIHpMAwGA1UdEwEB/wQCMAAwDgYDVR0PAQH/
BAQDAgWgMBMGA1UdJQQMMAoGCCsGAQUFBwMCMHQGA1UdEQRtMGuGJXNwaWZmZTov
L2V4YW1wbGUuY29tL2V4YW1wbGUtY2xpZW50LTKCFGNsaWVudC0yLmV4YW1wbGUu
Y29thwR/AAABhxAAAAAAAAAAAAAAAAAAAAABgRRjbGllbnQtMkBleGFtcGxlLmNv
bTAdBgNVHQ4EFgQUGHwnr7Ube1hqsodgcxJkfYuCKE8wHwYDVR0jBBgwFoAUL6S8
3l9AGZmmwHh+64YlUtMQzZcwCgYIKoZIzj0EAwIDSAAwRQIgK4JL1OO2nB7MqvGW
y2nbH4yYMu2jUkYhw9HFLUG2B6MCIQC4iDWKXp7R977LvuaaQaNcMmbGysrmfo8V
wOmp1JGOtA==
-----END CERTIFICATE-----


================================================
FILE: pingora-core/examples/keys/clients/invalid-cert.pem
================================================
-----BEGIN CERTIFICATE-----
MIICjzCCAjWgAwIBAgIUHYIVFYFooGVi2bNlk5R6GsbDKqUwCgYIKoZIzj0EAwIw
dTELMAkGA1UEBhMCVVMxEzARBgNVBAgMCkNhbGlmb3JuaWExFjAUBgNVBAcMDVNh
biBGcmFuY2lzY28xGDAWBgNVBAoMD0Nsb3VkZmxhcmUsIEluYzEfMB0GA1UEAwwW
RXhhbXBsZSBDbGllbnQgUm9vdCBDQTAeFw0yNTExMTkwODEzNDJaFw0zNTExMTcw
ODEzNDJaMG8xCzAJBgNVBAYTAlVTMRMwEQYDVQQIDApDYWxpZm9ybmlhMRYwFAYD
VQQHDA1TYW4gRnJhbmNpc2NvMRgwFgYDVQQKDA9DbG91ZGZsYXJlLCBJbmMxGTAX
BgNVBAMMEGV4YW1wbGUtY2xpZW50LTMwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNC
AATGKppMkUDsNvpzPPPiKmz53bbyIJPemIq5OdgJli8XZUFozxroJuFKhUuJOuFF
Jns2pzLHewIDzFXgErPqPxA/o4GoMIGlMAwGA1UdEwEB/wQCMAAwDgYDVR0PAQH/
BAQDAgWgMBMGA1UdJQQMMAoGCCsGAQUFBwMCMDAGA1UdEQQpMCeGJXNwaWZmZTov
L2V4YW1wbGUuY29tL2V4YW1wbGUtY2xpZW50LTMwHQYDVR0OBBYEFDV/v0zsiC/t
aomzxKa0jJ4SlmSzMB8GA1UdIwQYMBaAFK04aCtyumAb4PEMnh9OXLW7EIJSMAoG
CCqGSM49BAMCA0gAMEUCIH/wxvS0ae8DF1QteE+2FDOd/G2WeBMjsS8A6VyebAru
AiEAl2vjq0KePvM2X0jTZ/+RMJO33HOpYr0+PZw6FAa+aaw=
-----END CERTIFICATE-----


================================================
FILE: pingora-core/examples/keys/clients/invalid-key.pem
================================================
-----BEGIN EC PRIVATE KEY-----
MHcCAQEEIFyLneOGHgjTBS8I2GB8kF0LHgDS/eTJBSDNS4PAkJ0JoAoGCCqGSM49
AwEHoUQDQgAExiqaTJFA7Db6czzz4ips+d228iCT3piKuTnYCZYvF2VBaM8a6Cbh
SoVLiTrhRSZ7Nqcyx3sCA8xV4BKz6j8QPw==
-----END EC PRIVATE KEY-----


================================================
FILE: pingora-core/examples/keys/clients/key-1.pem
================================================
-----BEGIN EC PRIVATE KEY-----
MHcCAQEEIFNioASifzPy0Fcp+qmMoMUhFOJGLki20ygISqZb+HY1oAoGCCqGSM49
AwEHoUQDQgAEw3uoQcKZhOCbf/7CFhlrgQ2Fx3s1aBXqB7A83FqOkcMhw35u5b5w
ApqlgHUVFdRgO8n5M3NQmk6p71PBoxXX1w==
-----END EC PRIVATE KEY-----


================================================
FILE: pingora-core/examples/keys/clients/key-2.pem
================================================
-----BEGIN EC PRIVATE KEY-----
MHcCAQEEICd8DwjvpvE6nIKKKH2smrnLBM5zQyIkAKwBCiiRZGGsoAoGCCqGSM49
AwEHoUQDQgAEtiddK6uUbeE44ahIxxA9FD3XHrsx1HMPHaCwuErQSMSSKxZPQINI
ObYvbzeHxpCuiddRl5P6QZrtzt7OPBrH4A==
-----END EC PRIVATE KEY-----


================================================
FILE: pingora-core/examples/keys/server/cert.pem
================================================
-----BEGIN CERTIFICATE-----
MIICVzCCAf6gAwIBAgIUYGbx/r4kY40a+zNq7IW/1lsvzk0wCgYIKoZIzj0EAwIw
bDELMAkGA1UEBhMCVVMxEzARBgNVBAgMCkNhbGlmb3JuaWExFjAUBgNVBAcMDVNh
biBGcmFuY2lzY28xGDAWBgNVBAoMD0Nsb3VkZmxhcmUsIEluYzEWMBQGA1UEAwwN
b3BlbnJ1c3R5Lm9yZzAeFw0yNTExMTkwNDUxMzdaFw0zNTExMTcwNDUxMzdaMGwx
CzAJBgNVBAYTAlVTMRMwEQYDVQQIDApDYWxpZm9ybmlhMRYwFAYDVQQHDA1TYW4g
RnJhbmNpc2NvMRgwFgYDVQQKDA9DbG91ZGZsYXJlLCBJbmMxFjAUBgNVBAMMDW9w
ZW5ydXN0eS5vcmcwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAAT9EuNEw3e3syHW
SNnyJw7QVtOzDlILlt6F+jXT8UMBoMn4OnwC7AFlV8XzR9UpYSf1yq7Raps7c8TU
W9YF6ee4o34wfDAdBgNVHQ4EFgQU6B2YXLmWaboIZsf9YOCePRQXrO4wHwYDVR0j
BBgwFoAU6B2YXLmWaboIZsf9YOCePRQXrO4wDwYDVR0TAQH/BAUwAwEB/zApBgNV
HREEIjAggg8qLm9wZW5ydXN0eS5vcmeCDW9wZW5ydXN0eS5vcmcwCgYIKoZIzj0E
AwIDRwAwRAIgcSThJ5CWjuyWKfHbR+RuJ/9DtH1ag/47OolMQAvOczsCIDKVgPO/
A69bTOk4sq0y92YBBbe3hF82KrsgTR3nlkKF
-----END CERTIFICATE-----


================================================
FILE: pingora-core/examples/keys/server/key.pem
================================================
-----BEGIN PRIVATE KEY-----
MIGHAgEAMBMGByqGSM49AgEGCCqGSM49AwEHBG0wawIBAQQgTAnVhDuKvV5epzX4
uuC8kEZL2vUPI49gUmS5kM+j5VWhRANCAAT9EuNEw3e3syHWSNnyJw7QVtOzDlIL
lt6F+jXT8UMBoMn4OnwC7AFlV8XzR9UpYSf1yq7Raps7c8TUW9YF6ee4
-----END PRIVATE KEY-----


================================================
FILE: pingora-core/examples/service_dependencies.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Example demonstrating service dependency management.
//!
//! This example shows how services can declare dependencies on other services using
//! a fluent API with [`ServiceHandle`] references, ensuring they start in the correct
//! order and wait for dependencies to be ready.
//!
//! # Running the example
//!
//! ```bash
//! cargo run --example service_dependencies --package pingora-core
//! ```
//!
//! Expected output:
//! - DatabaseService starts and initializes (takes 2 seconds)
//! - CacheService starts and initializes (takes 1 second)
//! - ApiService waits for both dependencies, then starts
//!
//! # Key Features Demonstrated
//!
//! - Fluent API for declaring dependencies via [`ServiceHandle::add_dependency()`]
//! - Type-safe dependency declaration (no strings)
//! - Multiple ways to implement services based on readiness needs:
//!   - **DatabaseService**: Custom readiness timing (uses `ServiceWithDependencies`)
//!   - **CacheService**: Ready immediately (uses `Service`)
//!   - **ApiService**: Ready immediately (uses `Service`)
//! - Automatic dependency ordering and validation
//! - Prevention of typos in service names (compile-time safety)

use async_trait::async_trait;
use log::info;
use pingora_core::server::configuration::Opt;
#[cfg(unix)]
use pingora_core::server::ListenFds;
use pingora_core::server::{Server, ShutdownWatch};
use pingora_core::services::{Service, ServiceWithDependents};
// DatabaseService needs to control readiness timing
use pingora_core::services::ServiceReadyNotifier;
use std::sync::Arc;
use tokio::sync::Mutex;
use tokio::time::{sleep, Duration};

/// A custom service that delays signaling ready until initialization is complete
pub struct DatabaseService {
    connection_string: Arc<Mutex<Option<String>>>,
}

impl DatabaseService {
    fn new() -> Self {
        Self {
            connection_string: Arc::new(Mutex::new(None)),
        }
    }

    fn get_connection_string(&self) -> Arc<Mutex<Option<String>>> {
        self.connection_string.clone()
    }
}

#[async_trait]
impl ServiceWithDependents for DatabaseService {
    async fn start_service(
        &mut self,
        #[cfg(unix)] _fds: Option<ListenFds>,
        mut shutdown: ShutdownWatch,
        _listeners_per_fd: usize,
        ready_notifier: ServiceReadyNotifier,
    ) {
        info!("DatabaseService: Starting initialization...");

        // Simulate database connection setup
        sleep(Duration::from_secs(2)).await;

        // Store the connection string
        {
            let mut conn = self.connection_string.lock().await;
            *conn = Some("postgresql://localhost:5432/mydb".to_string());
        }

        info!("DatabaseService: Initialization complete, signaling ready");

        // Signal that the service is ready
        ready_notifier.notify_ready();

        // Keep running until shutdown
        shutdown.changed().await.ok();
        info!("DatabaseService: Shutting down");
    }

    fn name(&self) -> &str {
        "database"
    }

    fn threads(&self) -> Option<usize> {
        Some(1)
    }
}

/// A cache service that uses the simplified API
/// Signals ready immediately (using default implementation)
pub struct CacheService;

#[async_trait]
impl Service for CacheService {
    // Uses default start_service implementation which signals ready immediately

    async fn start_service(
        &mut self,
        #[cfg(unix)] _fds: Option<ListenFds>,
        mut shutdown: ShutdownWatch,
        _listeners_per_fd: usize,
    ) {
        info!("CacheService: Starting (ready immediately)...");

        // Simulate cache warmup
        sleep(Duration::from_secs(1)).await;
        info!("CacheService: Warmup complete");

        // Keep running until shutdown
        shutdown.changed().await.ok();
        info!("CacheService: Shutting down");
    }

    fn name(&self) -> &str {
        "cache"
    }

    fn threads(&self) -> Option<usize> {
        Some(1)
    }
}

/// An API service that depends on both database and cache
/// Uses the simplest API - signals ready immediately and just implements [Service]
pub struct ApiService {
    db_connection: Arc<Mutex<Option<String>>>,
}

impl ApiService {
    fn new(db_connection: Arc<Mutex<Option<String>>>) -> Self {
        Self { db_connection }
    }
}

#[async_trait]
impl Service for ApiService {
    // Uses default start_service - signals ready immediately

    async fn start_service(
        &mut self,
        #[cfg(unix)] _fds: Option<ListenFds>,
        mut shutdown: ShutdownWatch,
        _listeners_per_fd: usize,
    ) {
        info!("ApiService: Starting (dependencies should be ready)...");

        // Verify database connection is available
        {
            let conn = self.db_connection.lock().await;
            if let Some(conn_str) = &*conn {
                info!("ApiService: Using database connection: {}", conn_str);
            } else {
                panic!("ApiService: Database connection not available!");
            }
        }

        info!("ApiService: Ready to serve requests");

        // Keep running until shutdown
        shutdown.changed().await.ok();
        info!("ApiService: Shutting down");
    }

    fn name(&self) -> &str {
        "api"
    }

    fn threads(&self) -> Option<usize> {
        Some(1)
    }
}

fn main() {
    env_logger::Builder::from_default_env()
        .filter_level(log::LevelFilter::Info)
        .init();

    info!("Starting server with service dependencies...");

    let opt = Opt::parse_args();
    let mut server = Server::new(Some(opt)).unwrap();
    server.bootstrap();

    // Create the database service
    let db_service = DatabaseService::new();
    let db_connection = db_service.get_connection_string();

    // Create services
    let cache_service = CacheService;
    let api_service = ApiService::new(db_connection);

    // Add services and get their handles
    let db_handle = server.add_service(db_service);
    let cache_handle = server.add_service(cache_service);
    let api_handle = server.add_service(api_service);

    // Declare dependencies using the fluent API
    // The API service will not start until both dependencies signal ready
    api_handle.add_dependency(db_handle);
    api_handle.add_dependency(&cache_handle);

    info!("Services configured. Starting server...");
    info!("Expected startup order:");
    info!("  1. database (will initialize for 2 seconds)");
    info!("  2. cache (will initialize for 1 second)");
    info!("  3. api (will wait for both, then start)");
    info!("");
    info!("Press Ctrl+C to shut down");

    server.run_forever();
}


================================================
FILE: pingora-core/src/apps/http_app.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! A simple HTTP application trait that maps a request to a response

use async_trait::async_trait;
use http::Response;
use log::{debug, error, trace};
use pingora_http::ResponseHeader;
use std::sync::Arc;

use crate::apps::{HttpPersistentSettings, HttpServerApp, HttpServerOptions, ReusedHttpStream};
use crate::modules::http::{HttpModules, ModuleBuilder};
use crate::protocols::http::v2::server::H2Options;
use crate::protocols::http::HttpTask;
use crate::protocols::http::ServerSession;
use crate::server::ShutdownWatch;

/// This trait defines how to map a request to a response
#[async_trait]
pub trait ServeHttp {
    /// Define the mapping from a request to a response.
    /// Note that the request header is already read, but the implementation needs to read the
    /// request body if any.
    ///
    /// # Limitation
    /// In this API, the entire response has to be generated before the end of this call.
    /// So it is not suitable for streaming response or interactive communications.
    /// Users need to implement their own [`super::HttpServerApp`] for those use cases.
    async fn response(&self, http_session: &mut ServerSession) -> Response<Vec<u8>>;
}

// TODO: remove this in favor of HttpServer?
#[async_trait]
impl<SV> HttpServerApp for SV
where
    SV: ServeHttp + Send + Sync,
{
    async fn process_new_http(
        self: &Arc<Self>,
        mut http: ServerSession,
        shutdown: &ShutdownWatch,
    ) -> Option<ReusedHttpStream> {
        match http.read_request().await {
            Ok(res) => match res {
                false => {
                    debug!("Failed to read request header");
                    return None;
                }
                true => {
                    debug!("Successfully get a new request");
                }
            },
            Err(e) => {
                error!("HTTP server fails to read from downstream: {e}");
                return None;
            }
        }
        trace!("{:?}", http.req_header());
        if *shutdown.borrow() {
            http.set_keepalive(None);
        } else {
            http.set_keepalive(Some(60));
        }
        let new_response = self.response(&mut http).await;
        let (parts, body) = new_response.into_parts();
        let resp_header: ResponseHeader = parts.into();
        match http.write_response_header(Box::new(resp_header)).await {
            Ok(()) => {
                debug!("HTTP response header done.");
            }
            Err(e) => {
                error!(
                    "HTTP server fails to write to downstream: {e}, {}",
                    http.request_summary()
                );
            }
        }
        if !body.is_empty() {
            // TODO: check if chunked encoding is needed
            match http.write_response_body(body.into(), true).await {
                Ok(_) => debug!("HTTP response written."),
                Err(e) => error!(
                    "HTTP server fails to write to downstream: {e}, {}",
                    http.request_summary()
                ),
            }
        }
        let persistent_settings = HttpPersistentSettings::for_session(&http);
        match http.finish().await {
            Ok(c) => c.map(|s| ReusedHttpStream::new(s, Some(persistent_settings))),
            Err(e) => {
                error!("HTTP server fails to finish the request: {e}");
                None
            }
        }
    }
}

/// A helper struct for HTTP server with http modules embedded
pub struct HttpServer<SV> {
    app: SV,
    modules: HttpModules,
    pub server_options: Option<HttpServerOptions>,
    pub h2_options: Option<H2Options>,
}

impl<SV> HttpServer<SV> {
    /// Create a new [HttpServer] with the given app which implements [ServeHttp]
    pub fn new_app(app: SV) -> Self {
        HttpServer {
            app,
            modules: HttpModules::new(),
            server_options: None,
            h2_options: None,
        }
    }

    /// Add [ModuleBuilder] to this [HttpServer]
    pub fn add_module(&mut self, module: ModuleBuilder) {
        self.modules.add_module(module)
    }
}

#[async_trait]
impl<SV> HttpServerApp for HttpServer<SV>
where
    SV: ServeHttp + Send + Sync,
{
    async fn process_new_http(
        self: &Arc<Self>,
        mut http: ServerSession,
        shutdown: &ShutdownWatch,
    ) -> Option<ReusedHttpStream> {
        match http.read_request().await {
            Ok(res) => match res {
                false => {
                    debug!("Failed to read request header");
                    return None;
                }
                true => {
                    debug!("Successfully get a new request");
                }
            },
            Err(e) => {
                error!("HTTP server fails to read from downstream: {e}");
                return None;
            }
        }
        trace!("{:?}", http.req_header());
        if *shutdown.borrow() {
            http.set_keepalive(None);
        } else {
            http.set_keepalive(Some(60));
        }
        let mut module_ctx = self.modules.build_ctx();
        let req = http.req_header_mut();
        module_ctx.request_header_filter(req).await.ok()?;
        let new_response = self.app.response(&mut http).await;
        let (parts, body) = new_response.into_parts();
        let mut resp_header: ResponseHeader = parts.into();
        module_ctx
            .response_header_filter(&mut resp_header, body.is_empty())
            .await
            .ok()?;

        let task = HttpTask::Header(Box::new(resp_header), body.is_empty());
        trace!("{task:?}");

        match http.response_duplex_vec(vec![task]).await {
            Ok(_) => {
                debug!("HTTP response header done.");
            }
            Err(e) => {
                error!(
                    "HTTP server fails to write to downstream: {e}, {}",
                    http.request_summary()
                );
            }
        }

        let mut body = Some(body.into());
        module_ctx.response_body_filter(&mut body, true).ok()?;

        let task = HttpTask::Body(body, true);

        trace!("{task:?}");

        // TODO: check if chunked encoding is needed
        match http.response_duplex_vec(vec![task]).await {
            Ok(_) => debug!("HTTP response written."),
            Err(e) => error!(
                "HTTP server fails to write to downstream: {e}, {}",
                http.request_summary()
            ),
        }
        let persistent_settings = HttpPersistentSettings::for_session(&http);
        match http.finish().await {
            Ok(c) => c.map(|s| ReusedHttpStream::new(s, Some(persistent_settings))),
            Err(e) => {
                error!("HTTP server fails to finish the request: {e}");
                None
            }
        }
    }

    fn h2_options(&self) -> Option<H2Options> {
        self.h2_options.clone()
    }

    fn server_options(&self) -> Option<&HttpServerOptions> {
        self.server_options.as_ref()
    }
}


================================================
FILE: pingora-core/src/apps/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! The abstraction and implementation interface for service application logic

pub mod http_app;
pub mod prometheus_http_app;

use crate::server::ShutdownWatch;
use async_trait::async_trait;
use log::{debug, error};
use std::future::poll_fn;
use std::sync::Arc;

use crate::protocols::http::v2::server;
use crate::protocols::http::ServerSession;
use crate::protocols::Digest;
use crate::protocols::Stream;
use crate::protocols::ALPN;

// https://datatracker.ietf.org/doc/html/rfc9113#section-3.4
const H2_PREFACE: &[u8] = b"PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n";

#[async_trait]
/// This trait defines the interface of a transport layer (TCP or TLS) application.
pub trait ServerApp {
    /// Whenever a new connection is established, this function will be called with the established
    /// [`Stream`] object provided.
    ///
    /// The application can do whatever it wants with the `session`.
    ///
    /// After processing the `session`, if the `session`'s connection is reusable, This function
    /// can return it to the service by returning `Some(session)`. The returned `session` will be
    /// fed to another [`Self::process_new()`] for another round of processing.
    /// If not reusable, `None` should be returned.
    ///
    /// The `shutdown` argument will change from `false` to `true` when the server receives a
    /// signal to shutdown. This argument allows the application to react accordingly.
    async fn process_new(
        self: &Arc<Self>,
        mut session: Stream,
        // TODO: make this ShutdownWatch so that all task can await on this event
        shutdown: &ShutdownWatch,
    ) -> Option<Stream>;

    /// This callback will be called once after the service stops listening to its endpoints.
    async fn cleanup(&self) {}
}
#[non_exhaustive]
#[derive(Default)]
/// HTTP Server options that control how the server handles some transport types.
pub struct HttpServerOptions {
    /// Allow HTTP/2 for plaintext.
    pub h2c: bool,

    /// Allow proxying CONNECT requests when handling HTTP traffic.
    ///
    /// When disabled, CONNECT requests are rejected with 405 by proxy services.
    pub allow_connect_method_proxying: bool,

    #[doc(hidden)]
    pub force_custom: bool,

    /// Maximum number of requests that this connection will handle. This is
    /// equivalent to [Nginx's keepalive requests](https://nginx.org/en/docs/http/ngx_http_upstream_module.html#keepalive_requests)
    /// which says:
    ///
    /// > Closing connections periodically is necessary to free per-connection
    /// > memory allocations. Therefore, using too high maximum number of
    /// > requests could result in excessive memory usage and not recommended.
    ///
    /// Unlike nginx, the default behavior here is _no limit_.
    pub keepalive_request_limit: Option<u32>,
}

#[derive(Debug, Clone)]
pub struct HttpPersistentSettings {
    keepalive_timeout: Option<u64>,
    keepalive_reuses_remaining: Option<u32>,
}

impl HttpPersistentSettings {
    pub fn for_session(session: &ServerSession) -> Self {
        HttpPersistentSettings {
            keepalive_timeout: session.get_keepalive(),
            keepalive_reuses_remaining: session.get_keepalive_reuses_remaining(),
        }
    }

    pub fn apply_to_session(self, session: &mut ServerSession) {
        let Self {
            keepalive_timeout,
            mut keepalive_reuses_remaining,
        } = self;

        // Reduce the number of times the connection for this session can be
        // reused by one. A session with reuse count of zero won't be reused
        if let Some(reuses) = keepalive_reuses_remaining.as_mut() {
            *reuses = reuses.saturating_sub(1);
        }

        session.set_keepalive(keepalive_timeout);
        session.set_keepalive_reuses_remaining(keepalive_reuses_remaining);
    }
}

#[derive(Debug)]
pub struct ReusedHttpStream {
    stream: Stream,
    persistent_settings: Option<HttpPersistentSettings>,
}

impl ReusedHttpStream {
    pub fn new(stream: Stream, persistent_settings: Option<HttpPersistentSettings>) -> Self {
        ReusedHttpStream {
            stream,
            persistent_settings,
        }
    }

    pub fn consume(self) -> (Stream, Option<HttpPersistentSettings>) {
        (self.stream, self.persistent_settings)
    }
}

/// This trait defines the interface of an HTTP application.
#[async_trait]
pub trait HttpServerApp {
    /// Similar to the [`ServerApp`], this function is called whenever a new HTTP session is established.
    ///
    /// After successful processing, [`ServerSession::finish()`] can be called to return an optionally reusable
    /// connection back to the service. The caller needs to make sure that the connection is in a reusable state
    /// i.e., no error or incomplete read or write headers or bodies. Otherwise a `None` should be returned.
    async fn process_new_http(
        self: &Arc<Self>,
        mut session: ServerSession,
        // TODO: make this ShutdownWatch so that all task can await on this event
        shutdown: &ShutdownWatch,
    ) -> Option<ReusedHttpStream>;

    /// Provide options on how HTTP/2 connection should be established. This function will be called
    /// every time a new HTTP/2 **connection** needs to be established.
    ///
    /// A `None` means to use the built-in default options. See [`server::H2Options`] for more details.
    fn h2_options(&self) -> Option<server::H2Options> {
        None
    }

    /// Provide HTTP server options used to override default behavior. This function will be called
    /// every time a new connection is processed.
    ///
    /// A `None` means no server options will be applied.
    fn server_options(&self) -> Option<&HttpServerOptions> {
        None
    }

    async fn http_cleanup(&self) {}

    #[doc(hidden)]
    async fn process_custom_session(
        self: Arc<Self>,
        _stream: Stream,
        _shutdown: &ShutdownWatch,
    ) -> Option<Stream> {
        None
    }
}

#[async_trait]
impl<T> ServerApp for T
where
    T: HttpServerApp + Send + Sync + 'static,
{
    async fn process_new(
        self: &Arc<Self>,
        mut stream: Stream,
        shutdown: &ShutdownWatch,
    ) -> Option<Stream> {
        let mut h2c = self.server_options().as_ref().map_or(false, |o| o.h2c);
        let custom = self
            .server_options()
            .as_ref()
            .map_or(false, |o| o.force_custom);

        // try to read h2 preface
        if h2c && !custom {
            let mut buf = [0u8; H2_PREFACE.len()];
            let peeked = stream
                .try_peek(&mut buf)
                .await
                .map_err(|e| {
                    // this error is normal when h1 reuse and close the connection
                    debug!("Read error while peeking h2c preface {e}");
                    e
                })
                .ok()?;
            // not all streams support peeking
            if peeked {
                // turn off h2c (use h1) if h2 preface doesn't exist
                h2c = buf == H2_PREFACE;
            }
        }
        if h2c || matches!(stream.selected_alpn_proto(), Some(ALPN::H2)) {
            // create a shared connection digest
            let digest = Arc::new(Digest {
                ssl_digest: stream.get_ssl_digest(),
                // TODO: log h2 handshake time
                timing_digest: stream.get_timing_digest(),
                proxy_digest: stream.get_proxy_digest(),
                socket_digest: stream.get_socket_digest(),
            });

            let h2_options = self.h2_options();
            let h2_conn = server::handshake(stream, h2_options).await;
            let mut h2_conn = match h2_conn {
                Err(e) => {
                    error!("H2 handshake error {e}");
                    return None;
                }
                Ok(c) => c,
            };

            let mut shutdown = shutdown.clone();
            loop {
                // this loop ends when the client decides to close the h2 conn
                // TODO: add a timeout?
                let h2_stream = tokio::select! {
                    _ = shutdown.changed() => {
                        h2_conn.graceful_shutdown();
                        let _ = poll_fn(|cx| h2_conn.poll_closed(cx))
                            .await.map_err(|e| error!("H2 error waiting for shutdown {e}"));
                        return None;
                    }
                    h2_stream = server::HttpSession::from_h2_conn(&mut h2_conn, digest.clone()) => h2_stream
                };
                let h2_stream = match h2_stream {
                    Err(e) => {
                        // It is common for the client to just disconnect TCP without properly
                        // closing H2. So we don't log the errors here
                        debug!("H2 error when accepting new stream {e}");
                        return None;
                    }
                    Ok(s) => s?, // None means the connection is ready to be closed
                };
                let app = self.clone();
                let shutdown = shutdown.clone();
                pingora_runtime::current_handle().spawn(async move {
                    // Note, `PersistentSettings` not currently relevant for h2
                    app.process_new_http(ServerSession::new_http2(h2_stream), &shutdown)
                        .await;
                });
            }
        } else if custom || matches!(stream.selected_alpn_proto(), Some(ALPN::Custom(_))) {
            return self.clone().process_custom_session(stream, shutdown).await;
        } else {
            // No ALPN or ALPN::H1 and h2c was not configured, fallback to HTTP/1.1
            let mut session = ServerSession::new_http1(stream);
            if *shutdown.borrow() {
                // stop downstream from reusing if this service is shutting down soon
                session.set_keepalive(None);
            } else {
                // default 60s
                session.set_keepalive(Some(60));
            }
            session.set_keepalive_reuses_remaining(
                self.server_options()
                    .and_then(|opts| opts.keepalive_request_limit),
            );

            let mut result = self.process_new_http(session, shutdown).await;
            while let Some((stream, persistent_settings)) = result.map(|r| r.consume()) {
                let mut session = ServerSession::new_http1(stream);
                if let Some(persistent_settings) = persistent_settings {
                    persistent_settings.apply_to_session(&mut session);
                }

                result = self.process_new_http(session, shutdown).await;
            }
        }
        None
    }

    async fn cleanup(&self) {
        self.http_cleanup().await;
    }
}


================================================
FILE: pingora-core/src/apps/prometheus_http_app.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! An HTTP application that reports Prometheus metrics.

use async_trait::async_trait;
use http::Response;
use prometheus::{Encoder, TextEncoder};

use super::http_app::HttpServer;
use crate::apps::http_app::ServeHttp;
use crate::modules::http::compression::ResponseCompressionBuilder;
use crate::protocols::http::ServerSession;

/// An HTTP application that reports Prometheus metrics.
///
/// This application will report all the [static metrics](https://docs.rs/prometheus/latest/prometheus/index.html#static-metrics)
/// collected via the [Prometheus](https://docs.rs/prometheus/) crate;
pub struct PrometheusHttpApp;

#[async_trait]
impl ServeHttp for PrometheusHttpApp {
    async fn response(&self, _http_session: &mut ServerSession) -> Response<Vec<u8>> {
        let encoder = TextEncoder::new();
        let metric_families = prometheus::gather();
        let mut buffer = vec![];
        encoder.encode(&metric_families, &mut buffer).unwrap();
        Response::builder()
            .status(200)
            .header(http::header::CONTENT_TYPE, encoder.format_type())
            .header(http::header::CONTENT_LENGTH, buffer.len())
            .body(buffer)
            .unwrap()
    }
}

/// The [HttpServer] for [PrometheusHttpApp]
///
/// This type provides the functionality of [PrometheusHttpApp] with compression enabled
pub type PrometheusServer = HttpServer<PrometheusHttpApp>;

impl PrometheusServer {
    pub fn new() -> Self {
        let mut server = Self::new_app(PrometheusHttpApp);
        // enable gzip level 7 compression
        server.add_module(ResponseCompressionBuilder::enable(7));
        server
    }
}


================================================
FILE: pingora-core/src/connectors/http/custom/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use async_trait::async_trait;
use std::time::Duration;

use pingora_error::Result;

use crate::{
    protocols::{http::custom::client::Session, Stream},
    upstreams::peer::Peer,
};

// Either returns a Custom Session or the Stream for creating a new H1 session as a fallback.
pub enum Connection<S: Session> {
    Session(S),
    Stream(Stream),
}
#[doc(hidden)]
#[async_trait]
pub trait Connector: Send + Sync + Unpin + 'static {
    type Session: Session;

    async fn get_http_session<P: Peer + Send + Sync + 'static>(
        &self,
        peer: &P,
    ) -> Result<(Connection<Self::Session>, bool)>;

    async fn reused_http_session<P: Peer + Send + Sync + 'static>(
        &self,
        peer: &P,
    ) -> Option<Self::Session>;

    async fn release_http_session<P: Peer + Send + Sync + 'static>(
        &self,
        mut session: Self::Session,
        peer: &P,
        idle_timeout: Option<Duration>,
    );
}

#[doc(hidden)]
#[async_trait]
impl Connector for () {
    type Session = ();

    async fn get_http_session<P: Peer + Send + Sync + 'static>(
        &self,
        _peer: &P,
    ) -> Result<(Connection<Self::Session>, bool)> {
        unreachable!("connector: get_http_session")
    }

    async fn reused_http_session<P: Peer + Send + Sync + 'static>(
        &self,
        _peer: &P,
    ) -> Option<Self::Session> {
        unreachable!("connector: reused_http_session")
    }

    async fn release_http_session<P: Peer + Send + Sync + 'static>(
        &self,
        _session: Self::Session,
        _peer: &P,
        _idle_timeout: Option<Duration>,
    ) {
        unreachable!("connector: release_http_session")
    }
}


================================================
FILE: pingora-core/src/connectors/http/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Connecting to HTTP servers

use crate::connectors::http::custom::Connection;
use crate::connectors::ConnectorOptions;
use crate::listeners::ALPN;
use crate::protocols::http::client::HttpSession;
use crate::protocols::http::v1::client::HttpSession as Http1Session;
use crate::upstreams::peer::Peer;
use pingora_error::Result;
use std::time::Duration;

pub mod custom;
pub mod v1;
pub mod v2;

pub struct Connector<C = ()>
where
    C: custom::Connector,
{
    h1: v1::Connector,
    h2: v2::Connector,
    custom: C,
}

impl Connector<()> {
    pub fn new(options: Option<ConnectorOptions>) -> Self {
        Connector {
            h1: v1::Connector::new(options.clone()),
            h2: v2::Connector::new(options.clone()),
            custom: Default::default(),
        }
    }
}

impl<C> Connector<C>
where
    C: custom::Connector,
{
    pub fn new_custom(options: Option<ConnectorOptions>, custom: C) -> Self {
        Connector {
            h1: v1::Connector::new(options.clone()),
            h2: v2::Connector::new(options.clone()),
            custom,
        }
    }

    /// Get an [HttpSession] to the given server.
    ///
    /// The second return value indicates whether the session is connected via a reused stream.
    pub async fn get_http_session<P: Peer + Send + Sync + 'static>(
        &self,
        peer: &P,
    ) -> Result<(HttpSession<C::Session>, bool)> {
        let peer_opts = peer.get_peer_options();

        // Switch to custom protocol as early as possible
        if peer_opts.is_some_and(|o| matches!(o.alpn, ALPN::Custom(_))) {
            // We create the Connector before TLS, so we need to make sure that the server also supports the same custom protocol.
            // We will first check for sessions that we can reuse, if not we will create a new one based on the negotiated protocol

            // Step 1: Look for reused Custom Session
            if let Some(session) = self.custom.reused_http_session(peer).await {
                return Ok((HttpSession::Custom(session), true));
            }
            // Step 2: Check reuse pool for reused H1 session
            if let Some(h1) = self.h1.reused_http_session(peer).await {
                return Ok((HttpSession::H1(h1), true));
            }
            // Step 3: Try and create a new Custom session
            let (connection, reused) = self.custom.get_http_session(peer).await?;
            // We create the Connector before TLS, so we need to make sure that the server also supports the same custom protocol
            match connection {
                Connection::Session(s) => {
                    return Ok((HttpSession::Custom(s), reused));
                }
                // Negotiated ALPN is not custom, create a new H1 session
                Connection::Stream(s) => {
                    return Ok((
                        HttpSession::H1(Http1Session::new_with_options(s, peer)),
                        false,
                    ));
                }
            }
        }

        // NOTE: maybe TODO: we do not yet enforce that only TLS traffic can use h2, which is the
        // de facto requirement for h2, because non TLS traffic lack the negotiation mechanism.

        // We assume no peer option == no ALPN == h1 only
        let h1_only = peer
            .get_peer_options()
            .is_none_or(|o| o.alpn.get_max_http_version() == 1);
        if h1_only {
            let (h1, reused) = self.h1.get_http_session(peer).await?;
            Ok((HttpSession::H1(h1), reused))
        } else {
            // the peer allows h2, we first check the h2 reuse pool
            let reused_h2 = self.h2.reused_http_session(peer).await?;
            if let Some(h2) = reused_h2 {
                return Ok((HttpSession::H2(h2), true));
            }
            let h2_only = peer
                .get_peer_options()
                .is_some_and(|o| o.alpn.get_min_http_version() == 2)
                && !self.h2.h1_is_preferred(peer);
            if !h2_only {
                // We next check the reuse pool for h1 before creating a new h2 connection.
                // This is because the server may not support h2 at all, connections to
                // the server could all be h1.
                if let Some(h1) = self.h1.reused_http_session(peer).await {
                    return Ok((HttpSession::H1(h1), true));
                }
            }
            let session = self.h2.new_http_session(peer).await?;
            Ok((session, false))
        }
    }

    pub async fn release_http_session<P: Peer + Send + Sync + 'static>(
        &self,
        session: HttpSession<C::Session>,
        peer: &P,
        idle_timeout: Option<Duration>,
    ) {
        match session {
            HttpSession::H1(h1) => self.h1.release_http_session(h1, peer, idle_timeout).await,
            HttpSession::H2(h2) => self.h2.release_http_session(h2, peer, idle_timeout),
            HttpSession::Custom(c) => {
                self.custom
                    .release_http_session(c, peer, idle_timeout)
                    .await;
            }
        }
    }

    /// Tell the connector to always send h1 for ALPN for the given peer in the future.
    pub fn prefer_h1(&self, peer: &impl Peer) {
        self.h2.prefer_h1(peer);
    }
}

#[cfg(test)]
#[cfg(feature = "any_tls")]
mod tests {
    use super::*;
    use crate::connectors::TransportConnector;
    use crate::listeners::tls::TlsSettings;
    use crate::listeners::{Listeners, TransportStack, ALPN};
    use crate::protocols::http::v1::client::HttpSession as Http1Session;
    use crate::protocols::tls::CustomALPN;
    use crate::upstreams::peer::HttpPeer;
    use crate::upstreams::peer::PeerOptions;
    use async_trait::async_trait;
    use pingora_http::RequestHeader;
    use std::sync::Arc;
    use std::sync::Mutex;
    use tokio::io::AsyncWriteExt;
    use tokio::net::TcpListener;
    use tokio::task::JoinHandle;
    use tokio::time::sleep;

    async fn get_http(http: &mut Http1Session, expected_status: u16) {
        let mut req = Box::new(RequestHeader::build("GET", b"/", None).unwrap());
        req.append_header("Host", "one.one.one.one").unwrap();
        http.write_request_header(req).await.unwrap();
        http.read_response().await.unwrap();
        http.respect_keepalive();

        assert_eq!(http.get_status().unwrap(), expected_status);
        while http.read_body_bytes().await.unwrap().is_some() {}
    }

    #[tokio::test]
    async fn test_connect_h2() {
        let connector = Connector::new(None);
        let mut peer = HttpPeer::new(("1.1.1.1", 443), true, "one.one.one.one".into());
        peer.options.set_http_version(2, 2);
        let (h2, reused) = connector.get_http_session(&peer).await.unwrap();
        assert!(!reused);
        match &h2 {
            HttpSession::H1(_) => panic!("expect h2"),
            HttpSession::H2(h2_stream) => assert!(!h2_stream.ping_timedout()),
            HttpSession::Custom(_) => panic!("expect h2"),
        }

        connector.release_http_session(h2, &peer, None).await;

        let (h2, reused) = connector.get_http_session(&peer).await.unwrap();
        // reused this time
        assert!(reused);
        match &h2 {
            HttpSession::H1(_) => panic!("expect h2"),
            HttpSession::H2(h2_stream) => assert!(!h2_stream.ping_timedout()),
            HttpSession::Custom(_) => panic!("expect h2"),
        }
    }

    #[tokio::test]
    async fn test_connect_h1() {
        let connector = Connector::new(None);
        let mut peer = HttpPeer::new(("1.1.1.1", 443), true, "one.one.one.one".into());
        peer.options.set_http_version(1, 1);
        let (mut h1, reused) = connector.get_http_session(&peer).await.unwrap();
        assert!(!reused);
        match &mut h1 {
            HttpSession::H1(http) => {
                get_http(http, 200).await;
            }
            HttpSession::H2(_) => panic!("expect h1"),
            HttpSession::Custom(_) => panic!("expect h1"),
        }
        connector.release_http_session(h1, &peer, None).await;

        let (mut h1, reused) = connector.get_http_session(&peer).await.unwrap();
        // reused this time
        assert!(reused);
        match &mut h1 {
            HttpSession::H1(_) => {}
            HttpSession::H2(_) => panic!("expect h1"),
            HttpSession::Custom(_) => panic!("expect h1"),
        }
    }

    #[tokio::test]
    async fn test_connect_h2_fallback_h1_reuse() {
        // this test verify that if the server doesn't support h2, the Connector will reuse the
        // h1 session instead.

        let connector = Connector::new(None);
        let mut peer = HttpPeer::new(("1.1.1.1", 443), true, "one.one.one.one".into());
        // As it is hard to find a server that support only h1, we use the following hack to trick
        // the connector to think the server supports only h1. We force ALPN to use h1 and then
        // return the connection to the Connector. And then we use a Peer that allows h2
        peer.options.set_http_version(1, 1);
        let (mut h1, reused) = connector.get_http_session(&peer).await.unwrap();
        assert!(!reused);
        match &mut h1 {
            HttpSession::H1(http) => {
                get_http(http, 200).await;
            }
            HttpSession::H2(_) => panic!("expect h1"),
            HttpSession::Custom(_) => panic!("expect h1"),
        }
        connector.release_http_session(h1, &peer, None).await;

        let mut peer = HttpPeer::new(("1.1.1.1", 443), true, "one.one.one.one".into());
        peer.options.set_http_version(2, 1);

        let (mut h1, reused) = connector.get_http_session(&peer).await.unwrap();
        // reused this time
        assert!(reused);
        match &mut h1 {
            HttpSession::H1(_) => {}
            HttpSession::H2(_) => panic!("expect h1"),
            HttpSession::Custom(_) => panic!("expect h1"),
        }
    }

    #[tokio::test]
    async fn test_connect_prefer_h1() {
        let connector = Connector::new(None);
        let mut peer = HttpPeer::new(("1.1.1.1", 443), true, "one.one.one.one".into());
        peer.options.set_http_version(2, 1);
        connector.prefer_h1(&peer);

        let (mut h1, reused) = connector.get_http_session(&peer).await.unwrap();
        assert!(!reused);
        match &mut h1 {
            HttpSession::H1(http) => {
                get_http(http, 200).await;
            }
            HttpSession::H2(_) => panic!("expect h1"),
            HttpSession::Custom(_) => panic!("expect h1"),
        }
        connector.release_http_session(h1, &peer, None).await;

        peer.options.set_http_version(2, 2);
        let (mut h1, reused) = connector.get_http_session(&peer).await.unwrap();
        // reused this time
        assert!(reused);
        match &mut h1 {
            HttpSession::H1(_) => {}
            HttpSession::H2(_) => panic!("expect h1"),
            HttpSession::Custom(_) => panic!("expect h1"),
        }
    }
    // Track the flow of calls when using a custom protocol. For this we need to create a Mock Connector
    struct MockConnector {
        transport: TransportConnector,
        reusable: Arc<Mutex<bool>>, // Mock for tracking reusable sessions
    }

    #[async_trait]
    impl custom::Connector for MockConnector {
        type Session = ();

        async fn get_http_session<P: Peer + Send + Sync + 'static>(
            &self,
            peer: &P,
        ) -> Result<(Connection<Self::Session>, bool)> {
            let (stream, _) = self.transport.get_stream(peer).await?;

            match stream.selected_alpn_proto() {
                Some(ALPN::Custom(_)) => Ok((custom::Connection::Session(()), false)),
                _ => Ok(((custom::Connection::Stream(stream)), false)),
            }
        }

        async fn reused_http_session<P: Peer + Send + Sync + 'static>(
            &self,
            _peer: &P,
        ) -> Option<Self::Session> {
            let mut flag = self.reusable.lock().unwrap();
            if *flag {
                *flag = false;
                Some(())
            } else {
                None
            }
        }

        async fn release_http_session<P: Peer + Send + Sync + 'static>(
            &self,
            _session: Self::Session,
            _peer: &P,
            _idle_timeout: Option<Duration>,
        ) {
            let mut flag = self.reusable.lock().unwrap();
            *flag = true;
        }
    }

    // Finds an available TCP port on localhost for test server setup.
    async fn get_available_port() -> u16 {
        TcpListener::bind("127.0.0.1:0")
            .await
            .unwrap()
            .local_addr()
            .unwrap()
            .port()
    }
    // Creates a test connector for integration/unit tests.
    // For rustls, only ConnectorOptions are used here; the actual dangerous verifier is patched in the TLS connector.
    fn create_test_connector() -> Connector<MockConnector> {
        #[cfg(feature = "rustls")]
        let custom_transport = {
            let options = ConnectorOptions::new(1);
            TransportConnector::new(Some(options))
        };
        #[cfg(not(feature = "rustls"))]
        let custom_transport = TransportConnector::new(None);
        Connector {
            h1: v1::Connector::new(None),
            h2: v2::Connector::new(None),
            custom: MockConnector {
                transport: custom_transport,
                reusable: Arc::new(Mutex::new(false)),
            },
        }
    }

    // Creates a test peer that uses a custom ALPN protocol and disables cert/hostname verification for tests.
    fn create_peer_with_custom_proto(port: u16, proto: &[u8]) -> HttpPeer {
        let mut peer = HttpPeer::new(("127.0.0.1", port), true, "localhost".into());
        let mut options = PeerOptions::new();
        options.alpn = ALPN::Custom(CustomALPN::new(proto.to_vec()));
        // Disable cert verification for this test (self-signed or invalid certs are OK)
        options.verify_cert = false;
        options.verify_hostname = false;
        peer.options = options;
        peer
    }
    async fn build_custom_tls_listener(port: u16, custom_alpn: CustomALPN) -> TransportStack {
        let cert_path = format!("{}/tests/keys/server.crt", env!("CARGO_MANIFEST_DIR"));
        let key_path = format!("{}/tests/keys/key.pem", env!("CARGO_MANIFEST_DIR"));
        let addr = format!("127.0.0.1:{}", port);
        let mut listeners = Listeners::new();
        let mut tls_settings = TlsSettings::intermediate(&cert_path, &key_path).unwrap();

        tls_settings.set_alpn(ALPN::Custom(custom_alpn));
        listeners.add_tls_with_settings(&addr, None, tls_settings);
        listeners
            .build(
                #[cfg(unix)]
                None,
            )
            .await
            .unwrap()
            .pop()
            .unwrap()
    }

    // Spawn a simple TLS Server
    fn spawn_test_tls_server(listener: TransportStack) -> JoinHandle<()> {
        tokio::spawn(async move {
            loop {
                let stream = match listener.accept().await {
                    Ok(stream) => stream,
                    Err(_) => break, // Exit if listener is closed
                };
                let mut stream = stream.handshake().await.unwrap();

                let _ = stream.write_all(b"CUSTOM").await; // Ignore write errors
            }
        })
    }

    // Both server and client are using the same custom protocol
    #[tokio::test]
    async fn test_custom_client_custom_upstream() {
        let port = get_available_port().await;
        let custom_protocol = b"custom".to_vec();

        let listener =
            build_custom_tls_listener(port, CustomALPN::new(custom_protocol.clone())).await;
        let server_handle = spawn_test_tls_server(listener);
        // Wait for server to start up
        sleep(Duration::from_millis(100)).await;

        let connector = create_test_connector();
        let peer = create_peer_with_custom_proto(port, &custom_protocol);

        // Check that the agreed ALPN is custom and matches the expected value
        if let Ok((stream, reused)) = connector.custom.transport.get_stream(&peer).await {
            assert!(!reused);
            match stream.selected_alpn_proto() {
                Some(ALPN::Custom(protocol)) => {
                    assert_eq!(
                        protocol.protocol(),
                        custom_protocol.as_slice(),
                        "Negotiated custom ALPN does not match expected value"
                    );
                }
                other => panic!("Expected custom ALPN, got {:?}", other),
            }
        } else {
            panic!("Should be able to create a stream");
        }

        let (custom, reused) = connector.get_http_session(&peer).await.unwrap();
        assert!(!reused);
        match custom {
            HttpSession::H1(_) => panic!("expect custom"),
            HttpSession::H2(_) => panic!("expect custom"),
            HttpSession::Custom(_) => {}
        }
        connector.release_http_session(custom, &peer, None).await;

        // Assert it returns a reused custom session this time
        let (custom, reused) = connector.get_http_session(&peer).await.unwrap();
        assert!(reused);
        match custom {
            HttpSession::H1(_) => panic!("expect custom"),
            HttpSession::H2(_) => panic!("expect custom"),
            HttpSession::Custom(_) => {}
        }

        // Kill the server task
        server_handle.abort();
        sleep(Duration::from_millis(100)).await;
    }

    // Both client and server are using custom protocols, but different ones - we should create H1 sessions as fallback.
    // For RusTLS if there is no agreed protocol, the handshake directly fails, so this won't work
    // TODO: If no ALPN is matched, rustls should return None instead of failing the handshake.
    #[cfg(not(feature = "rustls"))]
    #[tokio::test]
    async fn test_incompatible_custom_client_custom_upstream() {
        let port = get_available_port().await;
        let custom_protocol = b"custom".to_vec();

        let listener =
            build_custom_tls_listener(port, CustomALPN::new(b"different_custom".to_vec())).await;
        let server_handle = spawn_test_tls_server(listener);
        // Wait for server to start up
        sleep(Duration::from_millis(100)).await;

        let connector = create_test_connector();
        let peer = create_peer_with_custom_proto(port, &custom_protocol);

        // Verify that there is no agreed ALPN
        if let Ok((stream, reused)) = connector.custom.transport.get_stream(&peer).await {
            assert!(!reused);
            assert!(stream.selected_alpn_proto().is_none());
        } else {
            panic!("Should be able to create a stream");
        }

        let (h1, reused) = connector.get_http_session(&peer).await.unwrap();
        assert!(!reused);
        match h1 {
            HttpSession::H1(_) => {}
            HttpSession::H2(_) => panic!("expect h1"),
            HttpSession::Custom(_) => panic!("expect h1"),
        }
        // Not testing session reuse logic here as we haven't implemented it. Next test will test this.

        // Kill the server task
        server_handle.abort();
        sleep(Duration::from_millis(100)).await;
    }

    // Client thinks server is custom but server is not Custom. Should fallback to H1
    #[tokio::test]
    async fn test_custom_client_non_custom_upstream() {
        let custom_proto = b"custom".to_vec();
        let connector = create_test_connector();
        // Upstream supports H1 and H2
        let mut peer = HttpPeer::new(("1.1.1.1", 443), true, "one.one.one.one".into());
        // Client sets upstream ALPN as custom protocol
        peer.options.alpn = ALPN::Custom(CustomALPN::new(custom_proto));

        // Verify that there is no agreed ALPN
        if let Ok((stream, reused)) = connector.custom.transport.get_stream(&peer).await {
            assert!(!reused);
            assert!(stream.selected_alpn_proto().is_none());
        } else {
            panic!("Should be able to create a stream");
        }

        let (mut h1, reused) = connector.get_http_session(&peer).await.unwrap();
        // Assert it returns a new H1 session
        assert!(!reused);
        match &mut h1 {
            HttpSession::H1(http) => {
                get_http(http, 200).await;
            }
            HttpSession::H2(_) => panic!("expect h1"),
            HttpSession::Custom(_) => panic!("expect h1"),
        }
        connector.release_http_session(h1, &peer, None).await;

        // Assert it returns a reused h1 session this time
        let (mut h1, reused) = connector.get_http_session(&peer).await.unwrap();
        assert!(reused);
        match &mut h1 {
            HttpSession::H1(_) => {}
            HttpSession::H2(_) => panic!("expect h1"),
            HttpSession::Custom(_) => panic!("expect h1"),
        }
    }
}

// Used for disabling certificate/hostname verification in rustls for tests and custom ALPN/self-signed scenarios.
#[cfg(all(test, feature = "rustls"))]
pub mod rustls_no_verify {
    use rustls::client::danger::{ServerCertVerified, ServerCertVerifier};
    use rustls::pki_types::{CertificateDer, ServerName};
    use rustls::Error as TLSError;
    use std::sync::Arc;
    #[derive(Debug)]
    pub struct NoCertificateVerification;

    impl ServerCertVerifier for NoCertificateVerification {
        fn verify_server_cert(
            &self,
            _end_entity: &CertificateDer,
            _intermediates: &[CertificateDer],
            _server_name: &ServerName,
            _scts: &[u8],
            _now: rustls::pki_types::UnixTime,
        ) -> Result<ServerCertVerified, TLSError> {
            Ok(ServerCertVerified::assertion())
        }

        fn verify_tls12_signature(
            &self,
            _message: &[u8],
            _cert: &CertificateDer,
            _dss: &rustls::DigitallySignedStruct,
        ) -> Result<rustls::client::danger::HandshakeSignatureValid, TLSError> {
            Ok(rustls::client::danger::HandshakeSignatureValid::assertion())
        }

        fn verify_tls13_signature(
            &self,
            _message: &[u8],
            _cert: &CertificateDer,
            _dss: &rustls::DigitallySignedStruct,
        ) -> Result<rustls::client::danger::HandshakeSignatureValid, TLSError> {
            Ok(rustls::client::danger::HandshakeSignatureValid::assertion())
        }

        fn supported_verify_schemes(&self) -> Vec<rustls::SignatureScheme> {
            vec![rustls::SignatureScheme::ECDSA_NISTP256_SHA256]
        }
    }

    pub fn apply_no_verify(config: &mut rustls::ClientConfig) {
        config
            .dangerous()
            .set_certificate_verifier(Arc::new(NoCertificateVerification));
    }
}


================================================
FILE: pingora-core/src/connectors/http/v1.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use crate::connectors::{ConnectorOptions, TransportConnector};
use crate::protocols::http::v1::client::HttpSession;
use crate::upstreams::peer::Peer;

use pingora_error::Result;
use std::time::Duration;

pub struct Connector {
    transport: TransportConnector,
}

impl Connector {
    pub fn new(options: Option<ConnectorOptions>) -> Self {
        Connector {
            transport: TransportConnector::new(options),
        }
    }

    pub async fn get_http_session<P: Peer + Send + Sync + 'static>(
        &self,
        peer: &P,
    ) -> Result<(HttpSession, bool)> {
        let (stream, reused) = self.transport.get_stream(peer).await?;
        let http = HttpSession::new_with_options(stream, peer);
        Ok((http, reused))
    }

    pub async fn reused_http_session<P: Peer + Send + Sync + 'static>(
        &self,
        peer: &P,
    ) -> Option<HttpSession> {
        let stream = self.transport.reused_stream(peer).await?;
        let http = HttpSession::new_with_options(stream, peer);
        Some(http)
    }

    pub async fn release_http_session<P: Peer + Send + Sync + 'static>(
        &self,
        mut session: HttpSession,
        peer: &P,
        idle_timeout: Option<Duration>,
    ) {
        session.respect_keepalive();
        if let Some(stream) = session.reuse().await {
            self.transport
                .release_stream(stream, peer.reuse_hash(), idle_timeout);
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::protocols::l4::socket::SocketAddr;
    use crate::upstreams::peer::HttpPeer;
    use crate::upstreams::peer::Peer;
    use pingora_http::RequestHeader;
    use std::fmt::{Display, Formatter, Result as FmtResult};

    async fn get_http(http: &mut HttpSession, expected_status: u16) {
        let mut req = Box::new(RequestHeader::build("GET", b"/", None).unwrap());
        req.append_header("Host", "one.one.one.one").unwrap();
        http.write_request_header(req).await.unwrap();
        http.read_response().await.unwrap();
        http.respect_keepalive();

        assert_eq!(http.get_status().unwrap(), expected_status);
        while http.read_body_bytes().await.unwrap().is_some() {}
    }

    #[tokio::test]
    async fn test_connect() {
        let connector = Connector::new(None);
        let peer = HttpPeer::new(("1.1.1.1", 80), false, "".into());
        // make a new connection to 1.1.1.1
        let (http, reused) = connector.get_http_session(&peer).await.unwrap();
        let server_addr = http.server_addr().unwrap();
        assert_eq!(*server_addr, "1.1.1.1:80".parse::<SocketAddr>().unwrap());
        assert!(!reused);

        // this http is not even used, so not be able to reuse
        connector.release_http_session(http, &peer, None).await;
        let (mut http, reused) = connector.get_http_session(&peer).await.unwrap();
        assert!(!reused);

        get_http(&mut http, 301).await;
        connector.release_http_session(http, &peer, None).await;
        let (_, reused) = connector.get_http_session(&peer).await.unwrap();
        assert!(reused);
    }

    #[cfg(unix)]
    #[tokio::test]
    async fn test_reuse_rejects_fd_mismatch() {
        use std::os::unix::prelude::AsRawFd;

        #[derive(Clone)]
        struct MismatchPeer {
            reuse_hash: u64,
            address: SocketAddr,
        }

        impl Display for MismatchPeer {
            fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
                write!(f, "{:?}", self.address)
            }
        }

        impl Peer for MismatchPeer {
            fn address(&self) -> &SocketAddr {
                &self.address
            }

            fn tls(&self) -> bool {
                false
            }

            fn sni(&self) -> &str {
                ""
            }

            fn reuse_hash(&self) -> u64 {
                self.reuse_hash
            }

            fn matches_fd<V: AsRawFd>(&self, _fd: V) -> bool {
                false
            }
        }

        let connector = Connector::new(None);
        let peer = HttpPeer::new(("1.1.1.1", 80), false, "".into());
        let (mut http, reused) = connector.get_http_session(&peer).await.unwrap();
        assert!(!reused);
        get_http(&mut http, 301).await;
        connector.release_http_session(http, &peer, None).await;

        let mismatch_peer = MismatchPeer {
            reuse_hash: peer.reuse_hash(),
            address: peer.address().clone(),
        };

        assert!(connector
            .reused_http_session(&mismatch_peer)
            .await
            .is_none());
    }

    #[tokio::test]
    #[cfg(feature = "any_tls")]
    async fn test_connect_tls() {
        let connector = Connector::new(None);
        let peer = HttpPeer::new(("1.1.1.1", 443), true, "one.one.one.one".into());
        // make a new connection to https://1.1.1.1
        let (http, reused) = connector.get_http_session(&peer).await.unwrap();
        let server_addr = http.server_addr().unwrap();
        assert_eq!(*server_addr, "1.1.1.1:443".parse::<SocketAddr>().unwrap());
        assert!(!reused);

        // this http is not even used, so not be able to reuse
        connector.release_http_session(http, &peer, None).await;
        let (mut http, reused) = connector.get_http_session(&peer).await.unwrap();
        assert!(!reused);

        get_http(&mut http, 200).await;
        connector.release_http_session(http, &peer, None).await;
        let (_, reused) = connector.get_http_session(&peer).await.unwrap();
        assert!(reused);
    }
}


================================================
FILE: pingora-core/src/connectors/http/v2.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use super::HttpSession;
use crate::connectors::{ConnectorOptions, TransportConnector};
use crate::protocols::http::custom::client::Session;
use crate::protocols::http::v1::client::HttpSession as Http1Session;
use crate::protocols::http::v2::client::{drive_connection, Http2Session};
use crate::protocols::{Digest, Stream, UniqueIDType};
use crate::upstreams::peer::{Peer, ALPN};

use bytes::Bytes;
use h2::client::SendRequest;
use log::debug;
use parking_lot::{Mutex, RwLock};
use pingora_error::{Error, ErrorType::*, OrErr, Result};
use pingora_pool::{ConnectionMeta, ConnectionPool, PoolNode};
use std::collections::HashMap;
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::watch;

struct Stub(SendRequest<Bytes>);

impl Stub {
    async fn new_stream(&self) -> Result<SendRequest<Bytes>> {
        let send_req = self.0.clone();
        send_req
            .ready()
            .await
            .or_err(H2Error, "while creating new stream")
    }
}

pub(crate) struct ConnectionRefInner {
    connection_stub: Stub,
    closed: watch::Receiver<bool>,
    ping_timeout_occurred: Arc<AtomicBool>,
    id: UniqueIDType,
    // max concurrent streams this connection is allowed to create
    max_streams: usize,
    // how many concurrent streams already active
    current_streams: AtomicUsize,
    // The connection is gracefully shutting down, no more stream is allowed
    shutting_down: AtomicBool,
    // because `SendRequest` doesn't actually have access to the underlying Stream,
    // we log info about timing and tcp info here.
    pub(crate) digest: Digest,
    // To serialize certain operations when trying to release the connect back to the pool,
    pub(crate) release_lock: Arc<Mutex<()>>,
}

#[derive(Clone)]
pub struct ConnectionRef(Arc<ConnectionRefInner>);

impl ConnectionRef {
    pub fn new(
        send_req: SendRequest<Bytes>,
        closed: watch::Receiver<bool>,
        ping_timeout_occurred: Arc<AtomicBool>,
        id: UniqueIDType,
        max_streams: usize,
        digest: Digest,
    ) -> Self {
        ConnectionRef(Arc::new(ConnectionRefInner {
            connection_stub: Stub(send_req),
            closed,
            ping_timeout_occurred,
            id,
            max_streams,
            current_streams: AtomicUsize::new(0),
            shutting_down: false.into(),
            digest,
            release_lock: Arc::new(Mutex::new(())),
        }))
    }

    pub fn more_streams_allowed(&self) -> bool {
        let current = self.0.current_streams.load(Ordering::Relaxed);
        !self.is_shutting_down()
            && self.0.max_streams > current
            && self.0.connection_stub.0.current_max_send_streams() > current
    }

    pub fn is_idle(&self) -> bool {
        self.0.current_streams.load(Ordering::Relaxed) == 0
    }

    pub fn release_stream(&self) {
        self.0.current_streams.fetch_sub(1, Ordering::SeqCst);
    }

    pub fn id(&self) -> UniqueIDType {
        self.0.id
    }

    pub fn digest(&self) -> &Digest {
        &self.0.digest
    }

    pub fn digest_mut(&mut self) -> Option<&mut Digest> {
        Arc::get_mut(&mut self.0).map(|inner| &mut inner.digest)
    }

    pub fn ping_timedout(&self) -> bool {
        self.0.ping_timeout_occurred.load(Ordering::Relaxed)
    }

    pub fn is_closed(&self) -> bool {
        *self.0.closed.borrow()
    }

    // different from is_closed, existing streams can still be processed but can no longer create
    // new stream.
    pub fn is_shutting_down(&self) -> bool {
        self.0.shutting_down.load(Ordering::Relaxed)
    }

    // spawn a stream if more stream is allowed, otherwise return Ok(None)
    pub async fn spawn_stream(&self) -> Result<Option<Http2Session>> {
        // Atomically check if the current_stream is over the limit
        // load(), compare and then fetch_add() cannot guarantee the same
        let current_streams = self.0.current_streams.fetch_add(1, Ordering::SeqCst);
        if current_streams >= self.0.max_streams {
            // already over the limit, reset the counter to the previous value
            self.0.current_streams.fetch_sub(1, Ordering::SeqCst);
            return Ok(None);
        }

        match self.0.connection_stub.new_stream().await {
            Ok(send_req) => Ok(Some(Http2Session::new(send_req, self.clone()))),
            Err(e) => {
                // fail to create the stream, reset the counter
                self.0.current_streams.fetch_sub(1, Ordering::SeqCst);
                // Remote sends GOAWAY(NO_ERROR): graceful shutdown: this connection no longer
                // accepts new streams. We can still try to create new connection.
                if e.root_cause()
                    .downcast_ref::<h2::Error>()
                    .map(|e| {
                        e.is_go_away() && e.is_remote() && e.reason() == Some(h2::Reason::NO_ERROR)
                    })
                    .unwrap_or(false)
                {
                    self.0.shutting_down.store(true, Ordering::Relaxed);
                    Ok(None)
                } else {
                    Err(e)
                }
            }
        }
    }
}

pub struct InUsePool {
    // TODO: use pingora hashmap to shard the lock contention
    pools: RwLock<HashMap<u64, PoolNode<ConnectionRef>>>,
}

impl InUsePool {
    fn new() -> Self {
        InUsePool {
            pools: RwLock::new(HashMap::new()),
        }
    }

    /// Attempt to remove an empty [`PoolNode`] entry from the pools `HashMap`.
    ///
    /// Same rationale as [`ConnectionPool::try_remove_empty_node`]: prevents
    /// unbounded growth when many unique reuse hashes are seen over time.
    /// The write lock + re-check ensures we never remove a node that was
    /// concurrently repopulated.
    fn try_remove_empty_node(&self, reuse_hash: u64) {
        let mut pools = self.pools.write();
        if let Some(pool) = pools.get(&reuse_hash) {
            if pool.is_empty() {
                pools.remove(&reuse_hash);
            }
        }
    }

    pub fn insert(&self, reuse_hash: u64, conn: ConnectionRef) {
        {
            let pools = self.pools.read();
            if let Some(pool) = pools.get(&reuse_hash) {
                pool.insert(conn.id(), conn);
                return;
            }
        } // drop read lock

        let mut pools = self.pools.write();
        // Double-check: another thread may have inserted a node between
        // dropping the read lock and acquiring this write lock.
        if let Some(pool) = pools.get(&reuse_hash) {
            pool.insert(conn.id(), conn);
            return;
        }
        let pool = PoolNode::new();
        pool.insert(conn.id(), conn);
        pools.insert(reuse_hash, pool);
    }

    // retrieve a h2 conn ref to create a new stream
    // the caller should return the conn ref to this pool if there are still
    // capacity left for more streams
    pub fn get(&self, reuse_hash: u64) -> Option<ConnectionRef> {
        let (result, maybe_empty) = {
            let pools = self.pools.read();
            match pools.get(&reuse_hash) {
                Some(pool) => match pool.get_any() {
                    Some((_, conn)) => (Some(conn), pool.is_empty()),
                    None => (None, true),
                },
                None => (None, false),
            }
        }; // read lock released here

        if maybe_empty {
            self.try_remove_empty_node(reuse_hash);
        }

        result
    }

    // release a h2_stream, this functional will cause an ConnectionRef to be returned (if exist)
    // the caller should update the ref and then decide where to put it (in use pool or idle)
    pub fn release(&self, reuse_hash: u64, id: UniqueIDType) -> Option<ConnectionRef> {
        let (result, maybe_empty) = {
            let pools = self.pools.read();
            if let Some(pool) = pools.get(&reuse_hash) {
                let removed = pool.remove(id);
                (removed, pool.is_empty())
            } else {
                (None, false)
            }
        }; // read lock released here

        if maybe_empty {
            self.try_remove_empty_node(reuse_hash);
        }

        result
    }
}

const DEFAULT_POOL_SIZE: usize = 128;

/// Http2 connector
pub struct Connector {
    // just for creating connections, the Stream of h2 should be reused
    transport: TransportConnector,
    // the h2 connection idle pool
    idle_pool: Arc<ConnectionPool<ConnectionRef>>,
    // the pool of h2 connections that have ongoing streams
    in_use_pool: InUsePool,
}

impl Connector {
    /// Create a new [Connector] from the given [ConnectorOptions]
    pub fn new(options: Option<ConnectorOptions>) -> Self {
        let pool_size = options
            .as_ref()
            .map_or(DEFAULT_POOL_SIZE, |o| o.keepalive_pool_size);
        // connection offload is handled by the [TransportConnector]
        Connector {
            transport: TransportConnector::new(options),
            idle_pool: Arc::new(ConnectionPool::new(pool_size)),
            in_use_pool: InUsePool::new(),
        }
    }

    pub fn transport(&self) -> &TransportConnector {
        &self.transport
    }

    pub fn idle_pool(&self) -> &Arc<ConnectionPool<ConnectionRef>> {
        &self.idle_pool
    }

    pub fn in_use_pool(&self) -> &InUsePool {
        &self.in_use_pool
    }

    /// Create a new Http2 connection to the given server
    ///
    /// Either an Http2 or Http1 session can be returned depending on the server's preference.
    pub async fn new_http_session<P: Peer + Send + Sync + 'static, C: Session>(
        &self,
        peer: &P,
    ) -> Result<HttpSession<C>> {
        let stream = self.transport.new_stream(peer).await?;

        // check alpn
        match stream.selected_alpn_proto() {
            Some(ALPN::H2) => { /* continue */ }
            Some(_) => {
                // H2 not supported
                return Ok(HttpSession::H1(Http1Session::new_with_options(
                    stream, peer,
                )));
            }
            None => {
                // if tls but no ALPN, default to h1
                // else if plaintext and min http version is 1, this is most likely h1
                if peer.tls()
                    || peer
                        .get_peer_options()
                        .is_none_or(|o| o.alpn.get_min_http_version() == 1)
                {
                    return Ok(HttpSession::H1(Http1Session::new_with_options(
                        stream, peer,
                    )));
                }
                // else: min http version=H2 over plaintext, there is no ALPN anyways, we trust
                // the caller that the server speaks h2c
            }
        }
        let max_h2_stream = peer.get_peer_options().map_or(1, |o| o.max_h2_streams);
        let conn = handshake(stream, max_h2_stream, peer.h2_ping_interval()).await?;
        let h2_stream = conn
            .spawn_stream()
            .await?
            .expect("newly created connections should have at least one free stream");
        if conn.more_streams_allowed() {
            self.in_use_pool.insert(peer.reuse_hash(), conn);
        }
        Ok(HttpSession::H2(h2_stream))
    }

    /// Try to create a new http2 stream from any existing H2 connection.
    ///
    /// None means there is no "free" connection left.
    pub async fn reused_http_session<P: Peer + Send + Sync + 'static>(
        &self,
        peer: &P,
    ) -> Result<Option<Http2Session>> {
        // check in use pool first so that we use fewer total connections
        // then idle pool
        let reuse_hash = peer.reuse_hash();

        // NOTE: We grab a conn from the pools, create a new stream and put the conn back if the
        // conn has more free streams. During this process another caller could arrive but is not
        // able to find the conn even the conn has free stream to use.
        // We accept this false negative to keep the implementation simple. This false negative
        // makes an actual impact when there are only a few connection.
        // Alternative design 1. given each free stream a conn object: a lot of Arc<>
        // Alternative design 2. mutex the pool, which creates lock contention when concurrency is high
        // Alternative design 3. do not pop conn from the pool so that multiple callers can grab it
        // which will cause issue where spawn_stream() could return None because others call it
        // first. Thus a caller might have to retry or give up. This issue is more likely to happen
        // when concurrency is high.
        let maybe_conn = self
            .in_use_pool
            .get(reuse_hash)
            // filter out closed, InUsePool does not have notify closed eviction like the idle pool
            // and it's possible we get an in use connection that is closed and not yet released
            .filter(|c| !c.is_closed())
            .or_else(|| self.idle_pool.get(&reuse_hash));
        if let Some(conn) = maybe_conn {
            #[cfg(unix)]
            if !peer.matches_fd(conn.id()) {
                return Ok(None);
            }
            #[cfg(windows)]
            {
                use std::os::windows::io::{AsRawSocket, RawSocket};
                struct WrappedRawSocket(RawSocket);
                impl AsRawSocket for WrappedRawSocket {
                    fn as_raw_socket(&self) -> RawSocket {
                        self.0
                    }
                }
                if !peer.matches_sock(WrappedRawSocket(conn.id() as RawSocket)) {
                    return Ok(None);
                }
            }
            let h2_stream = conn.spawn_stream().await?;
            if conn.more_streams_allowed() {
                self.in_use_pool.insert(reuse_hash, conn);
            }
            Ok(h2_stream)
        } else {
            Ok(None)
        }
    }

    /// Release a finished h2 stream.
    ///
    /// This function will terminate the [Http2Session]. The corresponding h2 connection will now
    /// have one more free stream to use.
    ///
    /// The h2 connection will be closed after `idle_timeout` if it has no active streams.
    pub fn release_http_session<P: Peer + Send + Sync + 'static>(
        &self,
        session: Http2Session,
        peer: &P,
        idle_timeout: Option<Duration>,
    ) {
        let id = session.conn.id();
        let reuse_hash = peer.reuse_hash();
        // get a ref to the connection, which we might need below, before dropping the h2
        let conn = session.conn();

        // The lock here is to make sure that in_use_pool.insert() below cannot be called after
        // in_use_pool.release(), which would have put the conn entry in both pools.
        // It also makes sure that only one conn will trigger the conn.is_idle() condition, which
        // avoids putting the same conn into the idle_pool more than once.
        let locked = conn.0.release_lock.lock_arc();
        // this drop() will both drop the actual stream and call the conn.release_stream()
        drop(session);
        // find and remove the conn stored in in_use_pool so that it could be put in the idle pool
        // if necessary
        let conn = self.in_use_pool.release(reuse_hash, id).unwrap_or(conn);
        if conn.is_closed() || conn.is_shutting_down() {
            // should never be put back to the pool
            return;
        }
        if conn.is_idle() {
            drop(locked);
            let meta = ConnectionMeta {
                key: reuse_hash,
                id,
            };
            let closed = conn.0.closed.clone();
            let (notify_evicted, watch_use) = self.idle_pool.put(&meta, conn);
            let pool = self.idle_pool.clone(); //clone the arc
            let rt = pingora_runtime::current_handle();
            rt.spawn(async move {
                pool.idle_timeout(&meta, idle_timeout, notify_evicted, closed, watch_use)
                    .await;
            });
        } else {
            self.in_use_pool.insert(reuse_hash, conn);
            drop(locked);
        }
    }

    /// Tell the connector to always send h1 for ALPN for the given peer in the future.
    pub fn prefer_h1(&self, peer: &impl Peer) {
        self.transport.prefer_h1(peer);
    }

    pub(crate) fn h1_is_preferred(&self, peer: &impl Peer) -> bool {
        self.transport
            .preferred_http_version
            .get(peer)
            .is_some_and(|v| matches!(v, ALPN::H1))
    }
}

// The h2 library we use has unbounded internal buffering, which will cause excessive memory
// consumption when the downstream is slower than upstream. This window size caps the buffering by
// limiting how much data can be inflight. However, setting this value will also cap the max
// download speed by limiting the bandwidth-delay product of a link.
// Long term, we should advertising large window but shrink it when a small buffer is full.
// 8 Mbytes = 80 Mbytes X 100ms, which should be enough for most links.
const H2_WINDOW_SIZE: u32 = 1 << 23;

pub async fn handshake(
    stream: Stream,
    max_streams: usize,
    h2_ping_interval: Option<Duration>,
) -> Result<ConnectionRef> {
    use h2::client::Builder;
    use pingora_runtime::current_handle;

    // Safe guard: new_http_session() assumes there should be at least one free stream
    if max_streams == 0 {
        return Error::e_explain(H2Error, "zero max_stream configured");
    }

    let id = stream.id();
    let digest = Digest {
        // NOTE: this field is always false because the digest is shared across all streams
        // The streams should log their own reuse info
        ssl_digest: stream.get_ssl_digest(),
        // TODO: log h2 handshake time
        timing_digest: stream.get_timing_digest(),
        proxy_digest: stream.get_proxy_digest(),
        socket_digest: stream.get_socket_digest(),
    };
    // TODO: make these configurable
    let (send_req, connection) = Builder::new()
        .enable_push(false)
        .initial_max_send_streams(max_streams)
        // The limit for the server. Server push is not allowed, so this value doesn't matter
        .max_concurrent_streams(1)
        .max_frame_size(64 * 1024) // advise server to send larger frames
        .initial_window_size(H2_WINDOW_SIZE)
        // should this be max_streams * H2_WINDOW_SIZE?
        .initial_connection_window_size(H2_WINDOW_SIZE)
        .handshake(stream)
        .await
        .or_err(HandshakeError, "during H2 handshake")?;
    debug!("H2 handshake to server done.");
    let ping_timeout_occurred = Arc::new(AtomicBool::new(false));
    let ping_timeout_clone = ping_timeout_occurred.clone();
    let max_allowed_streams = std::cmp::min(max_streams, connection.max_concurrent_send_streams());

    // Safe guard: new_http_session() assumes there should be at least one free stream
    // The server won't commonly advertise 0 max stream.
    if max_allowed_streams == 0 {
        return Error::e_explain(H2Error, "zero max_concurrent_send_streams received");
    }

    let (closed_tx, closed_rx) = watch::channel(false);

    current_handle().spawn(async move {
        drive_connection(
            connection,
            id,
            closed_tx,
            h2_ping_interval,
            ping_timeout_clone,
        )
        .await;
    });
    Ok(ConnectionRef::new(
        send_req,
        closed_rx,
        ping_timeout_occurred,
        id,
        max_allowed_streams,
        digest,
    ))
}

// TODO(slava): add custom unit tests
#[cfg(test)]
mod tests {
    use super::*;
    use crate::upstreams::peer::HttpPeer;

    #[tokio::test]
    #[cfg(feature = "any_tls")]
    async fn test_connect_h2() {
        let connector = Connector::new(None);
        let mut peer = HttpPeer::new(("1.1.1.1", 443), true, "one.one.one.one".into());
        peer.options.set_http_version(2, 2);
        let h2 = connector
            .new_http_session::<HttpPeer, ()>(&peer)
            .await
            .unwrap();
        match h2 {
            HttpSession::H1(_) => panic!("expect h2"),
            HttpSession::H2(h2_stream) => assert!(!h2_stream.ping_timedout()),
            HttpSession::Custom(_) => panic!("expect h2"),
        }
    }

    #[tokio::test]
    #[cfg(feature = "any_tls")]
    async fn test_connect_h1() {
        let connector = Connector::new(None);
        let mut peer = HttpPeer::new(("1.1.1.1", 443), true, "one.one.one.one".into());
        // a hack to force h1, new_http_session() in the future might validate this setting
        peer.options.set_http_version(1, 1);
        let h2 = connector
            .new_http_session::<HttpPeer, ()>(&peer)
            .await
            .unwrap();
        match h2 {
            HttpSession::H1(_) => {}
            HttpSession::H2(_) => panic!("expect h1"),
            HttpSession::Custom(_) => panic!("expect h1"),
        }
    }

    #[tokio::test]
    async fn test_connect_h1_plaintext() {
        let connector = Connector::new(None);
        let mut peer = HttpPeer::new(("1.1.1.1", 80), false, "".into());
        peer.options.set_http_version(2, 1);
        let h2 = connector
            .new_http_session::<HttpPeer, ()>(&peer)
            .await
            .unwrap();
        match h2 {
            HttpSession::H1(_) => {}
            HttpSession::H2(_) => panic!("expect h1"),
            HttpSession::Custom(_) => panic!("expect h1"),
        }
    }

    #[tokio::test]
    #[cfg(feature = "any_tls")]
    async fn test_h2_single_stream() {
        let connector = Connector::new(None);
        let mut peer = HttpPeer::new(("1.1.1.1", 443), true, "one.one.one.one".into());
        peer.options.set_http_version(2, 2);
        peer.options.max_h2_streams = 1;
        let h2 = connector
            .new_http_session::<HttpPeer, ()>(&peer)
            .await
            .unwrap();
        let h2_1 = match h2 {
            HttpSession::H1(_) => panic!("expect h2"),
            HttpSession::H2(h2_stream) => h2_stream,
            HttpSession::Custom(_) => panic!("expect h2"),
        };

        let id = h2_1.conn.id();

        assert!(connector
            .reused_http_session(&peer)
            .await
            .unwrap()
            .is_none());

        connector.release_http_session(h2_1, &peer, None);

        let h2_2 = connector.reused_http_session(&peer).await.unwrap().unwrap();
        assert_eq!(id, h2_2.conn.id());

        connector.release_http_session(h2_2, &peer, None);

        let h2_3 = connector.reused_http_session(&peer).await.unwrap().unwrap();
        assert_eq!(id, h2_3.conn.id());
    }

    #[tokio::test]
    #[cfg(feature = "any_tls")]
    async fn test_h2_multiple_stream() {
        let connector = Connector::new(None);
        let mut peer = HttpPeer::new(("1.1.1.1", 443), true, "one.one.one.one".into());
        peer.options.set_http_version(2, 2);
        peer.options.max_h2_streams = 3;
        let h2 = connector
            .new_http_session::<HttpPeer, ()>(&peer)
            .await
            .unwrap();
        let h2_1 = match h2 {
            HttpSession::H1(_) => panic!("expect h2"),
            HttpSession::H2(h2_stream) => h2_stream,
            HttpSession::Custom(_) => panic!("expect h2"),
        };

        let id = h2_1.conn.id();

        let h2_2 = connector.reused_http_session(&peer).await.unwrap().unwrap();
        assert_eq!(id, h2_2.conn.id());
        let h2_3 = connector.reused_http_session(&peer).await.unwrap().unwrap();
        assert_eq!(id, h2_3.conn.id());

        // max stream is 3 for now
        assert!(connector
            .reused_http_session(&peer)
            .await
            .unwrap()
            .is_none());

        connector.release_http_session(h2_1, &peer, None);

        let h2_4 = connector.reused_http_session(&peer).await.unwrap().unwrap();
        assert_eq!(id, h2_4.conn.id());

        connector.release_http_session(h2_2, &peer, None);
        connector.release_http_session(h2_3, &peer, None);
        connector.release_http_session(h2_4, &peer, None);

        // all streams are released, now the connection is idle
        let h2_5 = connector.reused_http_session(&peer).await.unwrap().unwrap();
        assert_eq!(id, h2_5.conn.id());
    }

    #[cfg(all(feature = "any_tls", unix))]
    #[tokio::test]
    async fn test_h2_reuse_rejects_fd_mismatch() {
        use crate::protocols::l4::socket::SocketAddr;
        use crate::upstreams::peer::Peer;
        use std::fmt::{Display, Formatter, Result as FmtResult};
        use std::os::unix::prelude::AsRawFd;

        #[derive(Clone)]
        struct MismatchPeer {
            reuse_hash: u64,
            address: SocketAddr,
        }

        impl Display for MismatchPeer {
            fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
                write!(f, "{:?}", self.address)
            }
        }

        impl Peer for MismatchPeer {
            fn address(&self) -> &SocketAddr {
                &self.address
            }

            fn tls(&self) -> bool {
                true
            }

            fn sni(&self) -> &str {
                ""
            }

            fn reuse_hash(&self) -> u64 {
                self.reuse_hash
            }

            fn matches_fd<V: AsRawFd>(&self, _fd: V) -> bool {
                false
            }
        }

        let connector = Connector::new(None);
        let mut peer = HttpPeer::new(("1.1.1.1", 443), true, "one.one.one.one".into());
        peer.options.set_http_version(2, 2);
        peer.options.max_h2_streams = 1;

        let h2 = connector
            .new_http_session::<HttpPeer, ()>(&peer)
            .await
            .unwrap();
        let h2_stream = match h2 {
            HttpSession::H1(_) => panic!("expect h2"),
            HttpSession::H2(h2_stream) => h2_stream,
            HttpSession::Custom(_) => panic!("expect h2"),
        };

        connector.release_http_session(h2_stream, &peer, None);

        let mismatch_peer = MismatchPeer {
            reuse_hash: peer.reuse_hash(),
            address: peer.address().clone(),
        };

        assert!(connector
            .reused_http_session(&mismatch_peer)
            .await
            .unwrap()
            .is_none());
    }
}


================================================
FILE: pingora-core/src/connectors/l4.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#[cfg(unix)]
use crate::protocols::l4::ext::connect_uds;
use crate::protocols::l4::ext::{
    connect_with as tcp_connect, set_dscp, set_recv_buf, set_tcp_fastopen_connect,
};
use crate::protocols::l4::socket::SocketAddr;
use crate::protocols::l4::stream::Stream;
use crate::protocols::{GetSocketDigest, SocketDigest};
use crate::upstreams::peer::Peer;
use async_trait::async_trait;
use log::debug;
use pingora_error::{Context, Error, ErrorType::*, OrErr, Result};
use rand::seq::SliceRandom;
use std::net::SocketAddr as InetSocketAddr;
#[cfg(unix)]
use std::os::unix::io::AsRawFd;
#[cfg(windows)]
use std::os::windows::io::AsRawSocket;

/// The interface to establish a L4 connection
#[async_trait]
pub trait Connect: std::fmt::Debug {
    async fn connect(&self, addr: &SocketAddr) -> Result<Stream>;
}

/// Settings for binding on connect
#[derive(Clone, Debug, Default)]
pub struct BindTo {
    // local ip address
    pub addr: Option<InetSocketAddr>,
    // port range
    port_range: Option<(u16, u16)>,
    // whether we fallback and try again on bind errors when a port range is set
    fallback: bool,
}

impl BindTo {
    /// Sets the port range we will bind to where the first item in the tuple is the lower bound
    /// and the second item is the upper bound.
    ///
    /// Note this bind option is only supported on Linux since 6.3, this is a no-op on other systems.
    /// To reset the range, pass a `None` or `Some((0,0))`, more information can be found [here](https://man7.org/linux/man-pages/man7/ip.7.html)
    pub fn set_port_range(&mut self, range: Option<(u16, u16)>) -> Result<()> {
        if range.is_none() && self.port_range.is_none() {
            // nothing to do
            return Ok(());
        }

        match range {
            // 0,0 is valid for resets
            None | Some((0, 0)) => self.port_range = Some((0, 0)),
            // set the port range if valid
            Some((low, high)) if low > 0 && low < high => {
                self.port_range = Some((low, high));
            }
            _ => return Error::e_explain(SocketError, "invalid port range: {range}"),
        }
        Ok(())
    }

    /// Set whether we fallback on no address available if a port range is set
    pub fn set_fallback(&mut self, fallback: bool) {
        self.fallback = fallback
    }

    /// Configured bind port range
    pub fn port_range(&self) -> Option<(u16, u16)> {
        self.port_range
    }

    /// Whether we attempt to fallback on no address available
    pub fn will_fallback(&self) -> bool {
        self.fallback && self.port_range.is_some()
    }
}

/// Establish a connection (l4) to the given peer using its settings and an optional bind address.
pub(crate) async fn connect<P>(peer: &P, bind_to: Option<BindTo>) -> Result<Stream>
where
    P: Peer + Send + Sync,
{
    if peer.get_proxy().is_some() {
        return proxy_connect(peer)
            .await
            .err_context(|| format!("Fail to establish CONNECT proxy: {}", peer));
    }
    let peer_addr = peer.address();
    let mut stream: Stream =
        if let Some(custom_l4) = peer.get_peer_options().and_then(|o| o.custom_l4.as_ref()) {
            custom_l4.connect(peer_addr).await?
        } else {
            match peer_addr {
                SocketAddr::Inet(addr) => {
                    let connect_future = tcp_connect(addr, bind_to.as_ref(), |socket| {
                        #[cfg(unix)]
                        let raw = socket.as_raw_fd();
                        #[cfg(windows)]
                        let raw = socket.as_raw_socket();

                        if peer.tcp_fast_open() {
                            set_tcp_fastopen_connect(raw)?;
                        }
                        if let Some(recv_buf) = peer.tcp_recv_buf() {
                            debug!("Setting recv buf size");
                            set_recv_buf(raw, recv_buf)?;
                        }
                        if let Some(dscp) = peer.dscp() {
                            debug!("Setting dscp");
                            set_dscp(raw, dscp)?;
                        }

                        if let Some(tweak_hook) = peer
                            .get_peer_options()
                            .and_then(|o| o.upstream_tcp_sock_tweak_hook.clone())
                        {
                            tweak_hook(socket)?;
                        }

                        Ok(())
                    });
                    let conn_res = match peer.connection_timeout() {
                        Some(t) => pingora_timeout::timeout(t, connect_future)
                            .await
                            .explain_err(ConnectTimedout, |_| {
                                format!("timeout {t:?} connecting to server {peer}")
                            })?,
                        None => connect_future.await,
                    };
                    match conn_res {
                        Ok(socket) => {
                            debug!("connected to new server: {}", peer.address());
                            Ok(socket.into())
                        }
                        Err(e) => {
                            let c = format!("Fail to connect to {peer}");
                            match e.etype() {
                                SocketError | BindError => Error::e_because(InternalError, c, e),
                                _ => Err(e.more_context(c)),
                            }
                        }
                    }
                }
                #[cfg(unix)]
                SocketAddr::Unix(addr) => {
                    let connect_future = connect_uds(
                        addr.as_pathname()
                            .expect("non-pathname unix sockets not supported as peer"),
                    );
                    let conn_res = match peer.connection_timeout() {
                        Some(t) => pingora_timeout::timeout(t, connect_future)
                            .await
                            .explain_err(ConnectTimedout, |_| {
                                format!("timeout {t:?} connecting to server {peer}")
                            })?,
                        None => connect_future.await,
                    };
                    match conn_res {
                        Ok(socket) => {
                            debug!("connected to new server: {}", peer.address());
                            Ok(socket.into())
                        }
                        Err(e) => {
                            let c = format!("Fail to connect to {peer}");
                            match e.etype() {
                                SocketError | BindError => Error::e_because(InternalError, c, e),
                                _ => Err(e.more_context(c)),
                            }
                        }
                    }
                }
            }?
        };

    let tracer = peer.get_tracer();
    if let Some(t) = tracer {
        t.0.on_connected();
        stream.tracer = Some(t);
    }

    // settings applied based on stream type
    if let Some(ka) = peer.tcp_keepalive() {
        stream.set_keepalive(ka)?;
    }
    stream.set_nodelay()?;

    #[cfg(unix)]
    let digest = SocketDigest::from_raw_fd(stream.as_raw_fd());
    #[cfg(windows)]
    let digest = SocketDigest::from_raw_socket(stream.as_raw_socket());
    digest
        .peer_addr
        .set(Some(peer_addr.clone()))
        .expect("newly created OnceCell must be empty");
    stream.set_socket_digest(digest);

    Ok(stream)
}

pub(crate) fn bind_to_random<P: Peer>(
    peer: &P,
    v4_list: &[InetSocketAddr],
    v6_list: &[InetSocketAddr],
) -> Option<BindTo> {
    // helper function for randomly picking address
    fn bind_to_ips(ips: &[InetSocketAddr]) -> Option<InetSocketAddr> {
        match ips.len() {
            0 => None,
            1 => Some(ips[0]),
            _ => {
                // pick a random bind ip
                ips.choose(&mut rand::thread_rng()).copied()
            }
        }
    }

    let mut bind_to = peer.get_peer_options().and_then(|o| o.bind_to.clone());
    if bind_to.as_ref().map(|b| b.addr).is_some() {
        // already have a bind address selected
        return bind_to;
    }

    let addr = match peer.address() {
        SocketAddr::Inet(sockaddr) => match sockaddr {
            InetSocketAddr::V4(_) => bind_to_ips(v4_list),
            InetSocketAddr::V6(_) => bind_to_ips(v6_list),
        },
        #[cfg(unix)]
        SocketAddr::Unix(_) => None,
    };

    if addr.is_some() {
        if let Some(bind_to) = bind_to.as_mut() {
            bind_to.addr = addr;
        } else {
            bind_to = Some(BindTo {
                addr,
                ..Default::default()
            });
        }
    }
    bind_to
}

use crate::protocols::raw_connect;

#[cfg(unix)]
async fn proxy_connect<P: Peer>(peer: &P) -> Result<Stream> {
    // safe to unwrap
    let proxy = peer.get_proxy().unwrap();
    let options = peer.get_peer_options().unwrap();

    // combine required and optional headers
    let mut headers = proxy
        .headers
        .iter()
        .chain(options.extra_proxy_headers.iter());

    // not likely to timeout during connect() to UDS
    let stream: Box<Stream> = Box::new(
        connect_uds(&proxy.next_hop)
            .await
            .or_err_with(ConnectError, || {
                format!("CONNECT proxy connect() error to {:?}", &proxy.next_hop)
            })?
            .into(),
    );

    let req_header = raw_connect::generate_connect_header(&proxy.host, proxy.port, &mut headers)?;
    let fut = raw_connect::connect(stream, &req_header, peer);
    let (mut stream, digest) = match peer.connection_timeout() {
        Some(t) => pingora_timeout::timeout(t, fut)
            .await
            .explain_err(ConnectTimedout, |_| "establishing CONNECT proxy")?,
        None => fut.await,
    }
    .map_err(|mut e| {
        // http protocol may ask to retry if reused client
        e.retry.decide_reuse(false);
        e
    })?;
    debug!("CONNECT proxy established: {:?}", proxy);
    stream.set_proxy_digest(digest);
    let stream = stream.into_any().downcast::<Stream>().unwrap(); // safe, it is Stream from above
    Ok(*stream)
}

#[cfg(windows)]
async fn proxy_connect<P: Peer>(peer: &P) -> Result<Stream> {
    panic!("peer proxy not supported on windows")
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::upstreams::peer::{BasicPeer, HttpPeer, Proxy};
    use pingora_error::ErrorType;
    use std::collections::BTreeMap;
    use std::path::PathBuf;
    use std::sync::atomic::{AtomicBool, Ordering};
    use std::sync::Arc;
    use std::time::{Duration, Instant};
    use tokio::io::AsyncWriteExt;
    use tokio::time::sleep;

    /// Some of the tests below are flaky when making new connections to mock
    /// servers. The servers are simple tokio listeners, so failures there are
    /// not indicative of real errors. This function will retry the peer/server
    /// in increasing intervals until it either succeeds in connecting or a long
    /// timeout expires (max 10sec)
    #[cfg(unix)]
    async fn wait_for_peer<P>(peer: &P)
    where
        P: Peer + Send + Sync,
    {
        use ErrorType as E;
        let start = Instant::now();
        let mut res = connect(peer, None).await;
        let mut delay = Duration::from_millis(5);
        let max_delay = Duration::from_secs(10);

        while start.elapsed() < max_delay {
            match &res {
                Err(e) if e.etype == E::ConnectRefused => {}
                _ => break,
            }
            sleep(delay).await;
            delay *= 2;
            res = connect(peer, None).await;
        }
    }

    #[tokio::test]
    async fn test_conn_error_refused() {
        let peer = BasicPeer::new("127.0.0.1:79"); // hopefully port 79 is not used
        let new_session = connect(&peer, None).await;
        assert_eq!(new_session.unwrap_err().etype(), &ConnectRefused)
    }

    // TODO broken on arm64
    #[ignore]
    #[tokio::test]
    async fn test_conn_error_no_route() {
        let peer = BasicPeer::new("[::3]:79"); // no route
        let new_session = connect(&peer, None).await;
        assert_eq!(new_session.unwrap_err().etype(), &ConnectNoRoute)
    }

    #[tokio::test]
    async fn test_conn_error_addr_not_avail() {
        let peer = HttpPeer::new("127.0.0.1:121".to_string(), false, "".to_string());
        let addr = "192.0.2.2:0".parse().ok();
        let bind_to = BindTo {
            addr,
            ..Default::default()
        };
        let new_session = connect(&peer, Some(bind_to)).await;
        assert_eq!(new_session.unwrap_err().etype(), &InternalError)
    }

    #[tokio::test]
    async fn test_conn_error_other() {
        let peer = HttpPeer::new("240.0.0.1:80".to_string(), false, "".to_string()); // non localhost
        let addr = "127.0.0.1:0".parse().ok();
        // create an error: cannot send from src addr: localhost to dst addr: a public IP
        let bind_to = BindTo {
            addr,
            ..Default::default()
        };
        let new_session = connect(&peer, Some(bind_to)).await;
        let error = new_session.unwrap_err();
        // XXX: some system will allow the socket to bind and connect without error, only to timeout
        assert!(
            error.etype() == &ConnectError
                || error.etype() == &ConnectTimedout
                // The error seen on mac: https://github.com/cloudflare/pingora/pull/679
                || (error.etype() == &InternalError),
            "{error:?}"
        )
    }

    #[tokio::test]
    async fn test_conn_timeout() {
        // 192.0.2.1 is effectively a blackhole
        let mut peer = BasicPeer::new("192.0.2.1:79");
        peer.options.connection_timeout = Some(std::time::Duration::from_millis(1)); //1ms
        let new_session = connect(&peer, None).await;
        assert_eq!(new_session.unwrap_err().etype(), &ConnectTimedout)
    }

    #[tokio::test]
    async fn test_tweak_hook() {
        const INIT_FLAG: bool = false;

        let flag = Arc::new(AtomicBool::new(INIT_FLAG));

        let mut peer = BasicPeer::new("1.1.1.1:80");

        let move_flag = Arc::clone(&flag);

        peer.options.upstream_tcp_sock_tweak_hook = Some(Arc::new(move |_| {
            move_flag.fetch_xor(true, Ordering::SeqCst);
            Ok(())
        }));

        connect(&peer, None).await.unwrap();

        assert_eq!(!INIT_FLAG, flag.load(Ordering::SeqCst));
    }

    #[tokio::test]
    async fn test_custom_connect() {
        #[derive(Debug)]
        struct MyL4;
        #[async_trait]
        impl Connect for MyL4 {
            async fn connect(&self, _addr: &SocketAddr) -> Result<Stream> {
                tokio::net::TcpStream::connect("1.1.1.1:80")
                    .await
                    .map(|s| s.into())
                    .or_fail()
            }
        }
        // :79 shouldn't be able to be connected to
        let mut peer = BasicPeer::new("1.1.1.1:79");
        peer.options.custom_l4 = Some(std::sync::Arc::new(MyL4 {}));

        let new_session = connect(&peer, None).await;

        // but MyL4 connects to :80 instead
        assert!(new_session.is_ok());
    }

    #[cfg(unix)]
    #[tokio::test]
    async fn test_connect_proxy_fail() {
        let mut peer = HttpPeer::new("1.1.1.1:80".to_string(), false, "".to_string());
        let mut path = PathBuf::new();
        path.push("/tmp/123");
        peer.proxy = Some(Proxy {
            next_hop: path.into(),
            host: "1.1.1.1".into(),
            port: 80,
            headers: BTreeMap::new(),
        });
        let new_session = connect(&peer, None).await;
        let e = new_session.unwrap_err();
        assert_eq!(e.etype(), &ConnectError);
        assert!(!e.retry());
    }

    #[cfg(unix)]
    #[tokio::test(flavor = "multi_thread")]
    async fn test_connect_proxy_work() {
        use crate::connectors::test_utils;

        let socket_path = test_utils::unique_uds_path("connect_proxy_work");
        let (ready_rx, shutdown_tx, server_handle) =
            test_utils::spawn_mock_uds_server(socket_path.clone(), b"HTTP/1.1 200 OK\r\n\r\n");

        // Wait for the server to be ready
        ready_rx.await.unwrap();

        let mut peer = HttpPeer::new("1.1.1.1:80".to_string(), false, "".to_string());
        let mut path = PathBuf::new();
        path.push(&socket_path);
        peer.proxy = Some(Proxy {
            next_hop: path.into(),
            host: "1.1.1.1".into(),
            port: 80,
            headers: BTreeMap::new(),
        });
        let new_session = connect(&peer, None).await;
        assert!(new_session.is_ok());

        // Clean up
        let _ = shutdown_tx.send(());
        server_handle.await.unwrap();
    }

    #[cfg(unix)]
    #[tokio::test(flavor = "multi_thread")]
    async fn test_connect_proxy_conn_closed() {
        use crate::connectors::test_utils;

        let socket_path = test_utils::unique_uds_path("connect_proxy_conn_closed");
        let (ready_rx, shutdown_tx, server_handle) =
            test_utils::spawn_mock_uds_server_close_immediate(socket_path.clone());

        // Wait for the server to be ready
        ready_rx.await.unwrap();

        let mut peer = HttpPeer::new("1.1.1.1:80".to_string(), false, "".to_string());
        let mut path = PathBuf::new();
        path.push(&socket_path);
        peer.proxy = Some(Proxy {
            next_hop: path.into(),
            host: "1.1.1.1".into(),
            port: 80,
            headers: BTreeMap::new(),
        });
        let new_session = connect(&peer, None).await;
        let err = new_session.unwrap_err();
        assert_eq!(err.etype(), &ConnectionClosed);
        assert!(!err.retry());

        // Clean up
        let _ = shutdown_tx.send(());
        server_handle.await.unwrap();
    }

    #[cfg(target_os = "linux")]
    #[tokio::test(flavor = "multi_thread")]
    async fn test_bind_to_port_range_on_connect() {
        fn get_ip_local_port_range() -> (u16, u16) {
            let path = "/proc/sys/net/ipv4/ip_local_port_range";
            let file = std::fs::read_to_string(path).unwrap();
            let mut parts = file.split_whitespace();
            (
                parts.next().unwrap().parse().unwrap(),
                parts.next().unwrap().parse().unwrap(),
            )
        }

        // one-off mock server
        async fn mock_inet_connect_server() -> u16 {
            use tokio::net::TcpListener;
            let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();

            let port = listener.local_addr().unwrap().port();

            tokio::spawn(async move {
                if let Ok((mut stream, _addr)) = listener.accept().await {
                    stream.write_all(b"HTTP/1.1 200 OK\r\n\r\n").await.unwrap();
                    // wait a bit so that the client can read
                    tokio::time::sleep(std::time::Duration::from_millis(100)).await;
                }
            });

            port
        }

        fn in_port_range(session: Stream, lower: u16, upper: u16) -> bool {
            let digest = session.get_socket_digest();
            let local_addr = digest
                .as_ref()
                .and_then(|s| s.local_addr())
                .unwrap()
                .as_inet()
                .unwrap();

            // assert range
            local_addr.port() >= lower && local_addr.port() <= upper
        }

        let port = mock_inet_connect_server().await;

        // need to read /proc/sys/net/ipv4/ip_local_port_range for this test to work
        // IP_LOCAL_PORT_RANGE clamp only works on ports in /proc/sys/net/ipv4/ip_local_port_range
        let (low, _) = get_ip_local_port_range();
        let high = low + 1;

        let peer = HttpPeer::new(format!("127.0.0.1:{port}"), false, "".to_string());
        let mut bind_to = BindTo {
            addr: "127.0.0.1:0".parse().ok(),
            ..Default::default()
        };

        // wait for the server to start
        wait_for_peer(&peer).await;

        bind_to.set_port_range(Some((low, high))).unwrap();

        let mut success_count = 0;
        let mut address_unavailable_count = 0;

        // Issue a bunch of requests at once and ensure that all successful
        // requests have ports in the right range and that there is at least
        // one address-unavailable error because we are restricting the number
        // of ports so heavily
        for _ in 0..10 {
            match connect(&peer, Some(bind_to.clone())).await {
                Ok(session) => {
                    assert!(in_port_range(session, low, high));
                    success_count += 1;
                }
                Err(e) if format!("{e:?}").contains("AddrNotAvailable") => {
                    address_unavailable_count += 1;
                }
                Err(e) => {
                    panic!("Unexpected error {e:?}")
                }
            }
        }

        assert!(address_unavailable_count > 0);
        assert!(success_count >= (high - low));

        // enable fallback, assert not in port range but successful
        bind_to.set_fallback(true);
        let session4 = connect(&peer, Some(bind_to.clone())).await.unwrap();
        assert!(!in_port_range(session4, low, high));

        // works without bind IP, shift up to use new ports
        let low = low + 2;
        let high = low + 1;
        let mut bind_to = BindTo::default();
        bind_to.set_port_range(Some((low, high))).unwrap();
        let session5 = connect(&peer, Some(bind_to.clone())).await.unwrap();
        assert!(in_port_range(session5, low, high));
    }

    #[test]
    fn test_bind_to_port_ranges() {
        let addr = "127.0.0.1:0".parse().ok();
        let mut bind_to = BindTo {
            addr,
            ..Default::default()
        };

        // None because the previous value was None
        bind_to.set_port_range(None).unwrap();
        assert!(bind_to.port_range.is_none());

        // zeroes are handled
        bind_to.set_port_range(Some((0, 0))).unwrap();
        assert_eq!(bind_to.port_range, Some((0, 0)));

        // zeroes because the previous value was Some
        bind_to.set_port_range(None).unwrap();
        assert_eq!(bind_to.port_range, Some((0, 0)));

        // low > high is error
        assert!(bind_to.set_port_range(Some((2000, 1000))).is_err());

        // low < high success
        bind_to.set_port_range(Some((1000, 2000))).unwrap();
        assert_eq!(bind_to.port_range, Some((1000, 2000)));
    }
}


================================================
FILE: pingora-core/src/connectors/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Connecting to servers

pub mod http;
pub mod l4;
mod offload;

#[cfg(feature = "any_tls")]
mod tls;

#[cfg(not(feature = "any_tls"))]
use crate::tls::connectors as tls;

use crate::protocols::Stream;
use crate::server::configuration::ServerConf;
use crate::upstreams::peer::{Peer, ALPN};

pub use l4::Connect as L4Connect;
use l4::{connect as l4_connect, BindTo};
use log::{debug, error, warn};
use offload::OffloadRuntime;
use parking_lot::RwLock;
use pingora_error::{Error, ErrorType::*, OrErr, Result};
use pingora_pool::{ConnectionMeta, ConnectionPool};
use std::collections::HashMap;
use std::net::SocketAddr;
use std::sync::Arc;
use tls::TlsConnector;
use tokio::sync::Mutex;

/// The options to configure a [TransportConnector]
#[derive(Clone)]
pub struct ConnectorOptions {
    /// Path to the CA file used to validate server certs.
    ///
    /// If `None`, the CA in the [default](https://www.openssl.org/docs/manmaster/man3/SSL_CTX_set_default_verify_paths.html)
    /// locations will be loaded
    pub ca_file: Option<String>,
    /// The maximum number of unique s2n configs to cache. Creating a new s2n config is an
    /// expensive operation, so we cache and re-use config objects with identical configurations.
    /// Defaults to a cache size of 10. A value of 0 disables the cache.
    ///
    /// WARNING: Disabling the s2n config cache can result in poor performance
    #[cfg(feature = "s2n")]
    pub s2n_config_cache_size: Option<usize>,
    /// The default client cert and key to use for mTLS
    ///
    /// Each individual connection can use their own cert key to override this.
    pub cert_key_file: Option<(String, String)>,
    /// When enabled allows TLS keys to be written to a file specified by the SSLKEYLOG
    /// env variable. This can be used by tools like Wireshark to decrypt traffic
    /// for debugging purposes.
    pub debug_ssl_keylog: bool,
    /// How many connections to keepalive
    pub keepalive_pool_size: usize,
    /// Optionally offload the connection establishment to dedicated thread pools
    ///
    /// TCP and TLS connection establishment can be CPU intensive. Sometimes such tasks can slow
    /// down the entire service, which causes timeouts which leads to more connections which
    /// snowballs the issue. Use this option to isolate these CPU intensive tasks from impacting
    /// other traffic.
    ///
    /// Syntax: (#pools, #thread in each pool)
    pub offload_threadpool: Option<(usize, usize)>,
    /// Bind to any of the given source IPv6 addresses
    pub bind_to_v4: Vec<SocketAddr>,
    /// Bind to any of the given source IPv4 addresses
    pub bind_to_v6: Vec<SocketAddr>,
}

impl ConnectorOptions {
    /// Derive the [ConnectorOptions] from a [ServerConf]
    pub fn from_server_conf(server_conf: &ServerConf) -> Self {
        // if both pools and threads are Some(>0)
        let offload_threadpool = server_conf
            .upstream_connect_offload_threadpools
            .zip(server_conf.upstream_connect_offload_thread_per_pool)
            .filter(|(pools, threads)| *pools > 0 && *threads > 0);

        // create SocketAddrs with port 0 for src addr bind

        let bind_to_v4 = server_conf
            .client_bind_to_ipv4
            .iter()
            .map(|v4| {
                let ip = v4.parse().unwrap();
                SocketAddr::new(ip, 0)
            })
            .collect();

        let bind_to_v6 = server_conf
            .client_bind_to_ipv6
            .iter()
            .map(|v6| {
                let ip = v6.parse().unwrap();
                SocketAddr::new(ip, 0)
            })
            .collect();
        ConnectorOptions {
            ca_file: server_conf.ca_file.clone(),
            cert_key_file: None, // TODO: use it
            #[cfg(feature = "s2n")]
            s2n_config_cache_size: server_conf.s2n_config_cache_size,
            debug_ssl_keylog: server_conf.upstream_debug_ssl_keylog,
            keepalive_pool_size: server_conf.upstream_keepalive_pool_size,
            offload_threadpool,
            bind_to_v4,
            bind_to_v6,
        }
    }

    /// Create a new [ConnectorOptions] with the given keepalive pool size
    pub fn new(keepalive_pool_size: usize) -> Self {
        ConnectorOptions {
            ca_file: None,
            #[cfg(feature = "s2n")]
            s2n_config_cache_size: None,
            cert_key_file: None,
            debug_ssl_keylog: false,
            keepalive_pool_size,
            offload_threadpool: None,
            bind_to_v4: vec![],
            bind_to_v6: vec![],
        }
    }
}

/// [TransportConnector] provides APIs to connect to servers via TCP or TLS with connection reuse
pub struct TransportConnector {
    tls_ctx: tls::Connector,
    connection_pool: Arc<ConnectionPool<Arc<Mutex<Stream>>>>,
    offload: Option<OffloadRuntime>,
    bind_to_v4: Vec<SocketAddr>,
    bind_to_v6: Vec<SocketAddr>,
    preferred_http_version: PreferredHttpVersion,
}

const DEFAULT_POOL_SIZE: usize = 128;

impl TransportConnector {
    /// Create a new [TransportConnector] with the given [ConnectorOptions]
    pub fn new(mut options: Option<ConnectorOptions>) -> Self {
        let pool_size = options
            .as_ref()
            .map_or(DEFAULT_POOL_SIZE, |c| c.keepalive_pool_size);
        // Take the offloading setting there because this layer has implement offloading,
        // so no need for stacks at lower layer to offload again.
        let offload = options.as_mut().and_then(|o| o.offload_threadpool.take());
        let bind_to_v4 = options
            .as_ref()
            .map_or_else(Vec::new, |o| o.bind_to_v4.clone());
        let bind_to_v6 = options
            .as_ref()
            .map_or_else(Vec::new, |o| o.bind_to_v6.clone());
        TransportConnector {
            tls_ctx: tls::Connector::new(options),
            connection_pool: Arc::new(ConnectionPool::new(pool_size)),
            offload: offload.map(|v| OffloadRuntime::new(v.0, v.1)),
            bind_to_v4,
            bind_to_v6,
            preferred_http_version: PreferredHttpVersion::new(),
        }
    }

    /// Connect to the given server [Peer]
    ///
    /// No connection is reused.
    pub async fn new_stream<P: Peer + Send + Sync + 'static>(&self, peer: &P) -> Result<Stream> {
        let rt = self
            .offload
            .as_ref()
            .map(|o| o.get_runtime(peer.reuse_hash()));
        let bind_to = l4::bind_to_random(peer, &self.bind_to_v4, &self.bind_to_v6);
        let alpn_override = self.preferred_http_version.get(peer);
        let stream = if let Some(rt) = rt {
            let peer = peer.clone();
            let tls_ctx = self.tls_ctx.clone();
            rt.spawn(async move { do_connect(&peer, bind_to, alpn_override, &tls_ctx.ctx).await })
                .await
                .or_err(InternalError, "offload runtime failure")??
        } else {
            do_connect(peer, bind_to, alpn_override, &self.tls_ctx.ctx).await?
        };

        Ok(stream)
    }

    /// Try to find a reusable connection to the given server [Peer]
    pub async fn reused_stream<P: Peer + Send + Sync>(&self, peer: &P) -> Option<Stream> {
        match self.connection_pool.get(&peer.reuse_hash()) {
            Some(s) => {
                debug!("find reusable stream, trying to acquire it");
                {
                    let _ = s.lock().await;
                } // wait for the idle poll to release it
                match Arc::try_unwrap(s) {
                    Ok(l) => {
                        let mut stream = l.into_inner();
                        // test_reusable_stream: we assume server would never actively send data
                        // first on an idle stream.
                        #[cfg(unix)]
                        if peer.matches_fd(stream.id()) && test_reusable_stream(&mut stream) {
                            Some(stream)
                        } else {
                            None
                        }
                        #[cfg(windows)]
                        {
                            use std::os::windows::io::{AsRawSocket, RawSocket};
                            struct WrappedRawSocket(RawSocket);
                            impl AsRawSocket for WrappedRawSocket {
                                fn as_raw_socket(&self) -> RawSocket {
                                    self.0
                                }
                            }
                            if peer.matches_sock(WrappedRawSocket(stream.id() as RawSocket))
                                && test_reusable_stream(&mut stream)
                            {
                                Some(stream)
                            } else {
                                None
                            }
                        }
                    }
                    Err(_) => {
                        error!("failed to acquire reusable stream");
                        None
                    }
                }
            }
            None => {
                debug!("No reusable connection found for {peer}");
                None
            }
        }
    }

    /// Return the [Stream] to the [TransportConnector] for connection reuse.
    ///
    /// Not all TCP/TLS connections can be reused. It is the caller's responsibility to make sure
    /// that protocol over the [Stream] supports connection reuse and the [Stream] itself is ready
    /// to be reused.
    ///
    /// If a [Stream] is dropped instead of being returned via this function. it will be closed.
    pub fn release_stream(
        &self,
        mut stream: Stream,
        key: u64, // usually peer.reuse_hash()
        idle_timeout: Option<std::time::Duration>,
    ) {
        if !test_reusable_stream(&mut stream) {
            return;
        }
        let id = stream.id();
        let meta = ConnectionMeta::new(key, id);
        debug!("Try to keepalive client session");
        let stream = Arc::new(Mutex::new(stream));
        let locked_stream = stream.clone().try_lock_owned().unwrap(); // safe as we just created it
        let (notify_close, watch_use) = self.connection_pool.put(&meta, stream);
        let pool = self.connection_pool.clone(); //clone the arc
        let rt = pingora_runtime::current_handle();
        rt.spawn(async move {
            pool.idle_poll(locked_stream, &meta, idle_timeout, notify_close, watch_use)
                .await;
        });
    }

    /// Get a stream to the given server [Peer]
    ///
    /// This function will try to find a reusable [Stream] first. If there is none, a new connection
    /// will be made to the server.
    ///
    /// The returned boolean will indicate whether the stream is reused.
    pub async fn get_stream<P: Peer + Send + Sync + 'static>(
        &self,
        peer: &P,
    ) -> Result<(Stream, bool)> {
        let reused_stream = self.reused_stream(peer).await;
        if let Some(s) = reused_stream {
            Ok((s, true))
        } else {
            let s = self.new_stream(peer).await?;
            Ok((s, false))
        }
    }

    /// Tell the connector to always send h1 for ALPN for the given peer in the future.
    pub fn prefer_h1(&self, peer: &impl Peer) {
        self.preferred_http_version.add(peer, 1);
    }
}

// Perform the actual L4 and tls connection steps while respecting the peer's
// connection timeout if there is one
async fn do_connect<P: Peer + Send + Sync>(
    peer: &P,
    bind_to: Option<BindTo>,
    alpn_override: Option<ALPN>,
    tls_ctx: &TlsConnector,
) -> Result<Stream> {
    // Create the future that does the connections, but don't evaluate it until
    // we decide if we need a timeout or not
    let connect_future = do_connect_inner(peer, bind_to, alpn_override, tls_ctx);

    match peer.total_connection_timeout() {
        Some(t) => match pingora_timeout::timeout(t, connect_future).await {
            Ok(res) => res,
            Err(_) => Error::e_explain(
                ConnectTimedout,
                format!("connecting to server {peer}, total-connection timeout {t:?}"),
            ),
        },
        None => connect_future.await,
    }
}

// Perform the actual L4 and tls connection steps with no timeout
async fn do_connect_inner<P: Peer + Send + Sync>(
    peer: &P,
    bind_to: Option<BindTo>,
    alpn_override: Option<ALPN>,
    tls_ctx: &TlsConnector,
) -> Result<Stream> {
    let stream = l4_connect(peer, bind_to).await?;
    if peer.tls() {
        let tls_stream = tls::connect(stream, peer, alpn_override, tls_ctx).await?;
        Ok(Box::new(tls_stream))
    } else {
        Ok(Box::new(stream))
    }
}

struct PreferredHttpVersion {
    // TODO: shard to avoid the global lock
    versions: RwLock<HashMap<u64, u8>>, // <hash of peer, version>
}

// TODO: limit the size of this

impl PreferredHttpVersion {
    pub fn new() -> Self {
        PreferredHttpVersion {
            versions: RwLock::default(),
        }
    }

    pub fn add(&self, peer: &impl Peer, version: u8) {
        let key = peer.reuse_hash();
        let mut v = self.versions.write();
        v.insert(key, version);
    }

    pub fn get(&self, peer: &impl Peer) -> Option<ALPN> {
        let key = peer.reuse_hash();
        let v = self.versions.read();
        v.get(&key)
            .copied()
            .map(|v| if v == 1 { ALPN::H1 } else { ALPN::H2H1 })
    }
}

use futures::future::FutureExt;
use tokio::io::AsyncReadExt;

/// Test whether a stream is already closed or not reusable (server sent unexpected data)
fn test_reusable_stream(stream: &mut Stream) -> bool {
    let mut buf = [0; 1];
    // tokio::task::unconstrained because now_or_never may yield None when the future is ready
    let result = tokio::task::unconstrained(stream.read(&mut buf[..])).now_or_never();
    if let Some(data_result) = result {
        match data_result {
            Ok(n) => {
                if n == 0 {
                    debug!("Idle connection is closed");
                } else {
                    warn!("Unexpected data read in idle connection");
                }
            }
            Err(e) => {
                debug!("Idle connection is broken: {e:?}");
            }
        }
        false
    } else {
        true
    }
}

/// Test utilities for creating mock acceptors.
#[cfg(all(test, unix))]
pub(crate) mod test_utils {
    use tokio::io::AsyncWriteExt;
    use tokio::net::UnixListener;

    /// Generates a unique socket path for testing to avoid conflicts when running in parallel
    pub fn unique_uds_path(test_name: &str) -> String {
        format!(
            "/tmp/test_{test_name}_{:?}_{}.sock",
            std::thread::current().id(),
            std::process::id()
        )
    }

    /// A mock UDS server that accepts one connection, sends data, and waits for shutdown signal
    ///
    /// Returns: (ready_rx, shutdown_tx, server_handle)
    /// - ready_rx: Wait on this to know when server is ready to accept connections
    /// - shutdown_tx: Send on this to tell server to shut down
    /// - server_handle: Join handle for the server task
    pub fn spawn_mock_uds_server(
        socket_path: String,
        response: &'static [u8],
    ) -> (
        tokio::sync::oneshot::Receiver<()>,
        tokio::sync::oneshot::Sender<()>,
        tokio::task::JoinHandle<()>,
    ) {
        let (ready_tx, ready_rx) = tokio::sync::oneshot::channel();
        let (shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel();

        let server_handle = tokio::spawn(async move {
            let _ = std::fs::remove_file(&socket_path);
            let listener = UnixListener::bind(&socket_path).unwrap();
            // Signal that the server is ready to accept connections
            let _ = ready_tx.send(());

            if let Ok((mut stream, _addr)) = listener.accept().await {
                let _ = stream.write_all(response).await;
                // Keep the connection open until the test tells us to shutdown
                let _ = shutdown_rx.await;
            }
            let _ = std::fs::remove_file(&socket_path);
        });

        (ready_rx, shutdown_tx, server_handle)
    }

    /// A mock UDS server that immediately closes connections (for testing error handling)
    ///
    /// Returns: (ready_rx, shutdown_tx, server_handle)
    pub fn spawn_mock_uds_server_close_immediate(
        socket_path: String,
    ) -> (
        tokio::sync::oneshot::Receiver<()>,
        tokio::sync::oneshot::Sender<()>,
        tokio::task::JoinHandle<()>,
    ) {
        let (ready_tx, ready_rx) = tokio::sync::oneshot::channel();
        let (shutdown_tx, shutdown_rx) = tokio::sync::oneshot::channel();

        let server_handle = tokio::spawn(async move {
            let _ = std::fs::remove_file(&socket_path);
            let listener = UnixListener::bind(&socket_path).unwrap();
            // Signal that the server is ready to accept connections
            let _ = ready_tx.send(());

            if let Ok((mut stream, _addr)) = listener.accept().await {
                let _ = stream.shutdown().await;
                // Wait for shutdown signal before cleaning up
                let _ = shutdown_rx.await;
            }
            let _ = std::fs::remove_file(&socket_path);
        });

        (ready_rx, shutdown_tx, server_handle)
    }
}

#[cfg(test)]
#[cfg(feature = "any_tls")]
mod tests {
    use pingora_error::ErrorType;
    use tls::Connector;

    use super::*;
    use crate::upstreams::peer::BasicPeer;

    // 192.0.2.1 is effectively a black hole
    const BLACK_HOLE: &str = "192.0.2.1:79";

    #[tokio::test]
    async fn test_connect() {
        let connector = TransportConnector::new(None);
        let peer = BasicPeer::new("1.1.1.1:80");
        // make a new connection to 1.1.1.1
        let stream = connector.new_stream(&peer).await.unwrap();
        connector.release_stream(stream, peer.reuse_hash(), None);

        let (_, reused) = connector.get_stream(&peer).await.unwrap();
        assert!(reused);
    }

    #[tokio::test]
    async fn test_connect_tls() {
        let connector = TransportConnector::new(None);
        let mut peer = BasicPeer::new("1.1.1.1:443");
        // BasicPeer will use tls when SNI is set
        peer.sni = "one.one.one.one".to_string();
        // make a new connection to https://1.1.1.1
        let stream = connector.new_stream(&peer).await.unwrap();
        connector.release_stream(stream, peer.reuse_hash(), None);

        let (_, reused) = connector.get_stream(&peer).await.unwrap();
        assert!(reused);
    }

    #[tokio::test(flavor = "multi_thread")]
    #[cfg(unix)]
    async fn test_connect_uds() {
        let socket_path = test_utils::unique_uds_path("transport_connector");
        let (ready_rx, shutdown_tx, server_handle) =
            test_utils::spawn_mock_uds_server(socket_path.clone(), b"it works!");

        // Wait for the server to be ready before connecting
        ready_rx.await.unwrap();

        // create a new service at /tmp
        let connector = TransportConnector::new(None);
        let peer = BasicPeer::new_uds(&socket_path).unwrap();
        // make a new connection to mock uds
        let mut stream = connector.new_stream(&peer).await.unwrap();
        let mut buf = [0; 9];
        let _ = stream.read(&mut buf).await.unwrap();
        assert_eq!(&buf, b"it works!");

        // Test connection reuse by releasing and getting the stream back
        connector.release_stream(stream, peer.reuse_hash(), None);
        let (stream, reused) = connector.get_stream(&peer).await.unwrap();
        assert!(reused);

        // Clean up: drop the stream, tell server to shutdown, and wait for it
        drop(stream);
        let _ = shutdown_tx.send(());
        server_handle.await.unwrap();
    }

    async fn do_test_conn_timeout(conf: Option<ConnectorOptions>) {
        let connector = TransportConnector::new(conf);
        let mut peer = BasicPeer::new(BLACK_HOLE);
        peer.options.connection_timeout = Some(std::time::Duration::from_millis(1));
        let stream = connector.new_stream(&peer).await;
        match stream {
            Ok(_) => panic!("should throw an error"),
            Err(e) => assert_eq!(e.etype(), &ConnectTimedout),
        }
    }

    #[tokio::test]
    async fn test_conn_timeout() {
        do_test_conn_timeout(None).await;
    }

    #[tokio::test]
    async fn test_conn_timeout_with_offload() {
        let mut conf = ConnectorOptions::new(8);
        conf.offload_threadpool = Some((2, 2));
        do_test_conn_timeout(Some(conf)).await;
    }

    #[tokio::test]
    async fn test_connector_bind_to() {
        // connect to remote while bind to localhost will fail
        let peer = BasicPeer::new("240.0.0.1:80");
        let mut conf = ConnectorOptions::new(1);
        conf.bind_to_v4.push("127.0.0.1:0".parse().unwrap());
        let connector = TransportConnector::new(Some(conf));

        let stream = connector.new_stream(&peer).await;
        let error = stream.unwrap_err();
        // XXX: some systems will allow the socket to bind and connect without error, only to timeout
        assert!(error.etype() == &ConnectError || error.etype() == &ConnectTimedout)
    }

    /// Helper function for testing error handling in the `do_connect` function.
    /// This assumes that the connection will fail to on the peer and returns
    /// the decomposed error type and message
    async fn get_do_connect_failure_with_peer(peer: &BasicPeer) -> (ErrorType, String) {
        let tls_connector = Connector::new(None);
        let stream = do_connect(peer, None, None, &tls_connector.ctx).await;
        match stream {
            Ok(_) => panic!("should throw an error"),
            Err(e) => (
                e.etype().clone(),
                e.context
                    .as_ref()
                    .map(|ctx| ctx.as_str().to_owned())
                    .unwrap_or_default(),
            ),
        }
    }

    #[tokio::test]
    async fn test_do_connect_with_total_timeout() {
        let mut peer = BasicPeer::new(BLACK_HOLE);
        peer.options.total_connection_timeout = Some(std::time::Duration::from_millis(1));
        let (etype, context) = get_do_connect_failure_with_peer(&peer).await;
        assert_eq!(etype, ConnectTimedout);
        assert!(context.contains("total-connection timeout"));
    }

    #[tokio::test]
    async fn test_tls_connect_timeout_supersedes_total() {
        let mut peer = BasicPeer::new(BLACK_HOLE);
        peer.options.total_connection_timeout = Some(std::time::Duration::from_millis(10));
        peer.options.connection_timeout = Some(std::time::Duration::from_millis(1));
        let (etype, context) = get_do_connect_failure_with_peer(&peer).await;
        assert_eq!(etype, ConnectTimedout);
        assert!(!context.contains("total-connection timeout"));
    }

    #[tokio::test]
    async fn test_do_connect_without_total_timeout() {
        let peer = BasicPeer::new(BLACK_HOLE);
        let (etype, context) = get_do_connect_failure_with_peer(&peer).await;
        assert!(etype != ConnectTimedout || !context.contains("total-connection timeout"));
    }
}


================================================
FILE: pingora-core/src/connectors/offload.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use log::debug;
use once_cell::sync::OnceCell;
use rand::Rng;
use tokio::runtime::{Builder, Handle};
use tokio::sync::oneshot::{channel, Sender};

// TODO: use pingora_runtime
// a shared runtime (thread pools)
pub(crate) struct OffloadRuntime {
    shards: usize,
    thread_per_shard: usize,
    // Lazily init the runtimes so that they are created after pingora
    // daemonize itself. Otherwise the runtime threads are lost.
    pools: OnceCell<Box<[(Handle, Sender<()>)]>>,
}

impl OffloadRuntime {
    pub fn new(shards: usize, thread_per_shard: usize) -> Self {
        assert!(shards != 0);
        assert!(thread_per_shard != 0);
        OffloadRuntime {
            shards,
            thread_per_shard,
            pools: OnceCell::new(),
        }
    }

    fn init_pools(&self) -> Box<[(Handle, Sender<()>)]> {
        let threads = self.shards * self.thread_per_shard;
        let mut pools = Vec::with_capacity(threads);
        for _ in 0..threads {
            // We use single thread runtimes to reduce the scheduling overhead of multithread
            // tokio runtime, which can be 50% of the on CPU time of the runtimes
            let rt = Builder::new_current_thread().enable_all().build().unwrap();
            let handler = rt.handle().clone();
            let (tx, rx) = channel::<()>();
            std::thread::Builder::new()
                .name("Offload thread".to_string())
                .spawn(move || {
                    debug!("Offload thread started");
                    // the thread that calls block_on() will drive the runtime
                    // rx will return when tx is dropped so this runtime and thread will exit
                    rt.block_on(rx)
                })
                .unwrap();
            pools.push((handler, tx));
        }

        pools.into_boxed_slice()
    }

    pub fn get_runtime(&self, hash: u64) -> &Handle {
        let mut rng = rand::thread_rng();

        // choose a shard based on hash and a random thread with in that shard
        // e.g. say thread_per_shard=2, shard 1 thread 1 is 1 * 2 + 1 = 3
        // [[th0, th1], [th2, th3], ...]
        let shard = hash as usize % self.shards;
        let thread_in_shard = rng.gen_range(0..self.thread_per_shard);
        let pools = self.pools.get_or_init(|| self.init_pools());
        &pools[shard * self.thread_per_shard + thread_in_shard].0
    }
}


================================================
FILE: pingora-core/src/connectors/tls/boringssl_openssl/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use log::debug;
use pingora_error::{Error, ErrorType::*, OrErr, Result};
use std::sync::{Arc, Once};

use crate::connectors::tls::replace_leftmost_underscore;
use crate::connectors::ConnectorOptions;
use crate::protocols::tls::client::handshake;
use crate::protocols::tls::SslStream;
use crate::protocols::IO;
use crate::tls::ext::{
    add_host, clear_error_stack, ssl_add_chain_cert, ssl_set_groups_list,
    ssl_set_renegotiate_mode_freely, ssl_set_verify_cert_store, ssl_use_certificate,
    ssl_use_private_key, ssl_use_second_key_share,
};
#[cfg(feature = "boringssl")]
use crate::tls::ssl::SslCurve;
use crate::tls::ssl::{SslConnector, SslFiletype, SslMethod, SslVerifyMode, SslVersion};
use crate::tls::x509::store::X509StoreBuilder;
use crate::upstreams::peer::{Peer, ALPN};

pub type TlsConnector = SslConnector;

const CIPHER_LIST: &str = "AES-128-GCM-SHA256\
    :AES-256-GCM-SHA384\
    :CHACHA20-POLY1305-SHA256\
    :ECDHE-ECDSA-AES128-GCM-SHA256\
    :ECDHE-ECDSA-AES256-GCM-SHA384\
    :ECDHE-RSA-AES128-GCM-SHA256\
    :ECDHE-RSA-AES256-GCM-SHA384\
    :ECDHE-RSA-AES128-SHA\
    :ECDHE-RSA-AES256-SHA384\
    :AES128-GCM-SHA256\
    :AES256-GCM-SHA384\
    :AES128-SHA\
    :AES256-SHA\
    :DES-CBC3-SHA";

/**
 * Enabled signature algorithms for signing/verification (ECDSA).
 * As of 4/10/2023, the only addition to boringssl's defaults is ECDSA_SECP521R1_SHA512.
 */
const SIGALG_LIST: &str = "ECDSA_SECP256R1_SHA256\
    :RSA_PSS_RSAE_SHA256\
    :RSA_PKCS1_SHA256\
    :ECDSA_SECP384R1_SHA384\
    :RSA_PSS_RSAE_SHA384\
    :RSA_PKCS1_SHA384\
    :RSA_PSS_RSAE_SHA512\
    :RSA_PKCS1_SHA512\
    :RSA_PKCS1_SHA1\
    :ECDSA_SECP521R1_SHA512";
/**
 * Enabled curves for ECDHE (signature key exchange).
 * As of 4/10/2023, the only addition to boringssl's defaults is SECP521R1.
 *
 * N.B. The ordering of these curves is important. The boringssl library will select the first one
 * as a guess when negotiating a handshake with a server using TLSv1.3. We should opt for curves
 * that are both computationally cheaper and more supported.
 */
#[cfg(feature = "boringssl")]
const BORINGSSL_CURVE_LIST: &[SslCurve] = &[
    SslCurve::X25519,
    SslCurve::SECP256R1,
    SslCurve::SECP384R1,
    SslCurve::SECP521R1,
];

static INIT_CA_ENV: Once = Once::new();
fn init_ssl_cert_env_vars() {
    // this sets env vars to pick up the root certs
    // it is universal across openssl and boringssl
    // safety: although impossible to prove safe we assume it's safe since the call is
    // wrapped in a call_once and it's unlikely other threads are reading these vars
    INIT_CA_ENV.call_once(|| unsafe { openssl_probe::init_openssl_env_vars() });
}

#[derive(Clone)]
pub struct Connector {
    pub(crate) ctx: Arc<SslConnector>, // Arc to support clone
}

impl Connector {
    pub fn new(options: Option<ConnectorOptions>) -> Self {
        let mut builder = SslConnector::builder(SslMethod::tls()).unwrap();
        // TODO: make these conf
        // Set supported ciphers.
        builder.set_cipher_list(CIPHER_LIST).unwrap();
        // Set supported signature algorithms and ECDH (key exchange) curves.
        builder
            .set_sigalgs_list(&SIGALG_LIST.to_lowercase())
            .unwrap();
        #[cfg(feature = "boringssl")]
        builder.set_curves(BORINGSSL_CURVE_LIST).unwrap();
        builder
            .set_max_proto_version(Some(SslVersion::TLS1_3))
            .unwrap();
        builder
            .set_min_proto_version(Some(SslVersion::TLS1))
            .unwrap();
        if let Some(conf) = options.as_ref() {
            if let Some(ca_file_path) = conf.ca_file.as_ref() {
                builder.set_ca_file(ca_file_path).unwrap();
            } else {
                init_ssl_cert_env_vars();
                // load from default system wide trust location. (the name is misleading)
                builder.set_default_verify_paths().unwrap();
            }
            if let Some((cert, key)) = conf.cert_key_file.as_ref() {
                builder.set_certificate_chain_file(cert).unwrap();

                builder.set_private_key_file(key, SslFiletype::PEM).unwrap();
            }
            if conf.debug_ssl_keylog {
                // write TLS keys to file specified by SSLKEYLOGFILE if it exists
                if let Some(keylog) = std::env::var_os("SSLKEYLOGFILE").and_then(|path| {
                    std::fs::OpenOptions::new()
                        .append(true)
                        .create(true)
                        .open(path)
                        .ok()
                }) {
                    use std::io::Write;
                    builder.set_keylog_callback(move |_, line| {
                        let _ = writeln!(&keylog, "{}", line);
                    });
                }
            }
        } else {
            init_ssl_cert_env_vars();
            builder.set_default_verify_paths().unwrap();
        }

        Connector {
            ctx: Arc::new(builder.build()),
        }
    }
}

pub(crate) async fn connect<T, P>(
    stream: T,
    peer: &P,
    alpn_override: Option<ALPN>,
    tls_ctx: &SslConnector,
) -> Result<SslStream<T>>
where
    T: IO,
    P: Peer + Send + Sync,
{
    let mut ssl_conf = tls_ctx.configure().unwrap();

    ssl_set_renegotiate_mode_freely(&mut ssl_conf);

    // Set up CA/verify cert store
    // TODO: store X509Store in the peer directly
    if let Some(ca_list) = peer.get_ca() {
        let mut store_builder = X509StoreBuilder::new().unwrap();
        for ca in &***ca_list {
            store_builder.add_cert(ca.clone()).unwrap();
        }
        ssl_set_verify_cert_store(&mut ssl_conf, &store_builder.build())
            .or_err(InternalError, "failed to load cert store")?;
    }

    // Set up client cert/key
    if let Some(key_pair) = peer.get_client_cert_key() {
        debug!("setting client cert and key");
        ssl_use_certificate(&mut ssl_conf, key_pair.leaf())
            .or_err(InternalError, "invalid client cert")?;
        ssl_use_private_key(&mut ssl_conf, key_pair.key())
            .or_err(InternalError, "invalid client key")?;

        let intermediates = key_pair.intermediates();
        if !intermediates.is_empty() {
            debug!("adding intermediate certificates for mTLS chain");
            for int in intermediates {
                ssl_add_chain_cert(&mut ssl_conf, int)
                    .or_err(InternalError, "invalid intermediate client cert")?;
            }
        }
    }

    if let Some(curve) = peer.get_peer_options().and_then(|o| o.curves) {
        ssl_set_groups_list(&mut ssl_conf, curve).or_err(InternalError, "invalid curves")?;
    }

    // second_keyshare is default true
    if !peer.get_peer_options().is_none_or(|o| o.second_keyshare) {
        ssl_use_second_key_share(&mut ssl_conf, false);
    }

    // disable verification if sni does not exist
    // XXX: verify on empty string cause null string seg fault
    if peer.sni().is_empty() {
        ssl_conf.set_use_server_name_indication(false);
        /* NOTE: technically we can still verify who signs the cert but turn it off to be
        consistent with nginx's behavior */
        ssl_conf.set_verify(SslVerifyMode::NONE);
    } else if peer.verify_cert() {
        if peer.verify_hostname() {
            let verify_param = ssl_conf.param_mut();
            add_host(verify_param, peer.sni()).or_err(InternalError, "failed to add host")?;
            // if sni had underscores in leftmost label replace and add
            if let Some(sni_s) = replace_leftmost_underscore(peer.sni()) {
                add_host(verify_param, sni_s.as_ref()).unwrap();
            }
            if let Some(alt_cn) = peer.alternative_cn() {
                if !alt_cn.is_empty() {
                    add_host(verify_param, alt_cn).unwrap();
                    // if alt_cn had underscores in leftmost label replace and add
                    if let Some(alt_cn_s) = replace_leftmost_underscore(alt_cn) {
                        add_host(verify_param, alt_cn_s.as_ref()).unwrap();
                    }
                }
            }
        }
        ssl_conf.set_verify(SslVerifyMode::PEER);
    } else {
        ssl_conf.set_verify(SslVerifyMode::NONE);
    }

    /*
       We always set set_verify_hostname(false) here because:
        - verify case.)  otherwise ssl.connect calls X509_VERIFY_PARAM_set1_host
                         which overrides the names added by add_host. Verify is
                         essentially on as long as the names are added.
        - off case.)    the non verify hostname case should have it disabled
    */
    ssl_conf.set_verify_hostname(false);

    if let Some(alpn) = alpn_override.as_ref().or(peer.get_alpn()) {
        ssl_conf.set_alpn_protos(alpn.to_wire_preference()).unwrap();
    }

    clear_error_stack();

    let complete_hook = peer
        .get_peer_options()
        .and_then(|o| o.upstream_tls_handshake_complete_hook.clone());
    let connect_future = handshake(ssl_conf, peer.sni(), stream, complete_hook);

    match peer.connection_timeout() {
        Some(t) => match pingora_timeout::timeout(t, connect_future).await {
            Ok(res) => res,
            Err(_) => Error::e_explain(
                ConnectTimedout,
                format!("connecting to server {}, timeout {:?}", peer, t),
            ),
        },
        None => connect_future.await,
    }
}


================================================
FILE: pingora-core/src/connectors/tls/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#[cfg(feature = "openssl_derived")]
mod boringssl_openssl;

#[cfg(feature = "openssl_derived")]
pub use boringssl_openssl::*;

#[cfg(feature = "s2n")]
mod s2n;

#[cfg(feature = "s2n")]
pub use s2n::*;

#[cfg(feature = "rustls")]
mod rustls;

#[cfg(feature = "rustls")]
pub use rustls::*;

///    OpenSSL considers underscores in hostnames non-compliant.
///    We replace the underscore in the leftmost label as we must support these
///    hostnames for wildcard matches and we have not patched OpenSSL.
///
///    https://github.com/openssl/openssl/issues/12566
///
///    > The labels must follow the rules for ARPANET host names. They must
///    > start with a letter, end with a letter or digit, and have as interior
///    > characters only letters, digits, and hyphen.  There are also some
///    > restrictions on the length.  Labels must be 63 characters or less.
///    - https://datatracker.ietf.org/doc/html/rfc1034#section-3.5
#[cfg(feature = "any_tls")]
pub fn replace_leftmost_underscore(sni: &str) -> Option<String> {
    // wildcard is only leftmost label
    if let Some((leftmost, rest)) = sni.split_once('.') {
        // if not a subdomain or leftmost does not contain underscore return
        if !rest.contains('.') || !leftmost.contains('_') {
            return None;
        }
        // we have a subdomain, replace underscores
        let leftmost = leftmost.replace('_', "-");
        return Some(format!("{leftmost}.{rest}"));
    }
    None
}

#[cfg(feature = "any_tls")]
#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_replace_leftmost_underscore() {
        let none_cases = [
            "",
            "some",
            "some.com",
            "1.1.1.1:5050",
            "dog.dot.com",
            "dog.d_t.com",
            "dog.dot.c_m",
            "d_g.com",
            "_",
            "dog.c_m",
        ];

        for case in none_cases {
            assert!(replace_leftmost_underscore(case).is_none(), "{}", case);
        }

        assert_eq!(
            Some("bb-b.some.com".to_string()),
            replace_leftmost_underscore("bb_b.some.com")
        );
        assert_eq!(
            Some("a-a-a.some.com".to_string()),
            replace_leftmost_underscore("a_a_a.some.com")
        );
        assert_eq!(
            Some("-.some.com".to_string()),
            replace_leftmost_underscore("_.some.com")
        );
    }
}


================================================
FILE: pingora-core/src/connectors/tls/rustls/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::sync::Arc;

use log::debug;
use pingora_error::{
    Error,
    ErrorType::{ConnectTimedout, InvalidCert},
    OrErr, Result,
};
use pingora_rustls::{
    load_ca_file_into_store, load_certs_and_key_files, load_platform_certs_incl_env_into_store,
    version, CertificateDer, CertificateError, ClientConfig as RusTlsClientConfig,
    DigitallySignedStruct, KeyLogFile, PrivateKeyDer, RootCertStore, RusTlsError, ServerName,
    SignatureScheme, TlsConnector as RusTlsConnector, UnixTime, WebPkiServerVerifier,
};

// Uses custom certificate verification from rustls's 'danger' module.
use pingora_rustls::{
    HandshakeSignatureValid, ServerCertVerified, ServerCertVerifier as RusTlsServerCertVerifier,
};

use crate::protocols::tls::{client::handshake, TlsStream};
use crate::{connectors::ConnectorOptions, listeners::ALPN, protocols::IO, upstreams::peer::Peer};

use super::replace_leftmost_underscore;

#[derive(Clone)]
pub struct Connector {
    pub ctx: Arc<TlsConnector>,
}

impl Connector {
    /// Create a new connector based on the optional configurations. If no
    /// configurations are provided, no customized certificates or keys will be
    /// used
    pub fn new(config_opt: Option<ConnectorOptions>) -> Self {
        TlsConnector::build_connector(config_opt).unwrap()
    }
}

pub struct TlsConnector {
    config: Arc<RusTlsClientConfig>,
    ca_certs: Arc<RootCertStore>,
}

impl TlsConnector {
    pub(crate) fn build_connector(options: Option<ConnectorOptions>) -> Result<Connector>
    where
        Self: Sized,
    {
        // NOTE: Rustls only supports TLS 1.2 & 1.3

        // TODO: currently using Rustls defaults
        // - support SSLKEYLOGFILE
        // - set supported ciphers/algorithms/curves
        // - add options for CRL/OCSP validation

        let (ca_certs, certs_key) = {
            let mut ca_certs = RootCertStore::empty();
            let mut certs_key = None;

            if let Some(conf) = options.as_ref() {
                if let Some(ca_file_path) = conf.ca_file.as_ref() {
                    load_ca_file_into_store(ca_file_path, &mut ca_certs)?;
                } else {
                    load_platform_certs_incl_env_into_store(&mut ca_certs)?;
                }
                if let Some((cert, key)) = conf.cert_key_file.as_ref() {
                    certs_key = load_certs_and_key_files(cert, key)?;
                }
            } else {
                load_platform_certs_incl_env_into_store(&mut ca_certs)?;
            }

            (ca_certs, certs_key)
        };

        // TODO: WebPkiServerVerifier for CRL/OCSP validation
        let builder =
            RusTlsClientConfig::builder_with_protocol_versions(&[&version::TLS12, &version::TLS13])
                .with_root_certificates(ca_certs.clone());

        let mut config = match certs_key {
            Some((certs, key)) => {
                match builder.with_client_auth_cert(certs.clone(), key.clone_key()) {
                    Ok(config) => config,
                    Err(err) => {
                        // TODO: is there a viable alternative to the panic?
                        // falling back to no client auth... does not seem to be reasonable.
                        panic!("Failed to configure client auth cert/key. Error: {}", err);
                    }
                }
            }
            None => builder.with_no_client_auth(),
        };

        // Enable SSLKEYLOGFILE support for debugging TLS traffic
        if let Some(options) = options.as_ref() {
            if options.debug_ssl_keylog {
                config.key_log = Arc::new(KeyLogFile::new());
            }
        }

        Ok(Connector {
            ctx: Arc::new(TlsConnector {
                config: Arc::new(config),
                ca_certs: Arc::new(ca_certs),
            }),
        })
    }
}

pub async fn connect<T, P>(
    stream: T,
    peer: &P,
    alpn_override: Option<ALPN>,
    tls_ctx: &TlsConnector,
) -> Result<TlsStream<T>>
where
    T: IO,
    P: Peer + Send + Sync,
{
    let config = &tls_ctx.config;

    // TODO: setup CA/verify cert store from peer
    // peer.get_ca() returns None by default. It must be replaced by the
    // implementation of `peer`
    let key_pair = peer.get_client_cert_key();
    let mut updated_config_opt: Option<RusTlsClientConfig> = match key_pair {
        None => None,
        Some(key_arc) => {
            debug!("setting client cert and key");

            let mut cert_chain = vec![];
            debug!("adding leaf certificate to mTLS cert chain");
            cert_chain.push(key_arc.leaf());

            debug!("adding intermediate certificates to mTLS cert chain");
            key_arc
                .intermediates()
                .to_owned()
                .iter()
                .copied()
                .for_each(|i| cert_chain.push(i));

            let certs: Vec<CertificateDer> = cert_chain.into_iter().map(|c| c.into()).collect();
            let private_key: PrivateKeyDer =
                key_arc.key().as_slice().to_owned().try_into().unwrap();

            let builder = RusTlsClientConfig::builder_with_protocol_versions(&[
                &version::TLS12,
                &version::TLS13,
            ])
            .with_root_certificates(Arc::clone(&tls_ctx.ca_certs));
            debug!("added root ca certificates");

            let mut updated_config = builder.with_client_auth_cert(certs, private_key).or_err(
                InvalidCert,
                "Failed to use peer cert/key to update Rustls config",
            )?;
            // Preserve keylog setting from original config
            updated_config.key_log = Arc::clone(&config.key_log);
            Some(updated_config)
        }
    };

    if let Some(alpn) = alpn_override.as_ref().or(peer.get_alpn()) {
        let alpn_protocols = alpn.to_wire_protocols();
        if let Some(updated_config) = updated_config_opt.as_mut() {
            updated_config.alpn_protocols = alpn_protocols;
        } else {
            let mut updated_config = RusTlsClientConfig::clone(config);
            updated_config.alpn_protocols = alpn_protocols;
            updated_config_opt = Some(updated_config);
        }
    }

    let mut domain = peer.sni().to_string();

    if let Some(updated_config) = updated_config_opt.as_mut() {
        let verification_mode = if peer.sni().is_empty() {
            updated_config.enable_sni = false;
            /* NOTE: technically we can still verify who signs the cert but turn it off to be
            consistent with nginx's behavior */
            Some(VerificationMode::SkipAll) // disable verification if sni does not exist
        } else if !peer.verify_cert() {
            Some(VerificationMode::SkipAll)
        } else if !peer.verify_hostname() {
            Some(VerificationMode::SkipHostname)
        } else {
            // if sni had underscores in leftmost label replace and add
            if let Some(sni_s) = replace_leftmost_underscore(peer.sni()) {
                domain = sni_s;
            }
            None
            // to use the custom verifier for the full verify:
            // Some(VerificationMode::Full)
        };

        // Builds the custom_verifier when verification_mode is set.
        if let Some(mode) = verification_mode {
            let delegate = WebPkiServerVerifier::builder(Arc::clone(&tls_ctx.ca_certs))
                .build()
                .or_err(InvalidCert, "Failed to build WebPkiServerVerifier")?;

            let custom_verifier = Arc::new(CustomServerCertVerifier::new(delegate, mode));

            updated_config
                .dangerous()
                .set_certificate_verifier(custom_verifier);
        }
    }

    // TODO: curve setup from peer
    // - second key share from peer, currently only used in boringssl with PQ features

    // Patch config for dangerous verifier if needed, but only in test builds.
    #[cfg(test)]
    if !peer.verify_cert() || !peer.verify_hostname() {
        use crate::connectors::http::rustls_no_verify::apply_no_verify;
        if let Some(cfg) = updated_config_opt.as_mut() {
            apply_no_verify(cfg);
        } else {
            let mut tmp = RusTlsClientConfig::clone(config);
            apply_no_verify(&mut tmp);
            updated_config_opt = Some(tmp);
        }
    }

    let tls_conn = if let Some(cfg) = updated_config_opt {
        RusTlsConnector::from(Arc::new(cfg))
    } else {
        RusTlsConnector::from(Arc::clone(config))
    };

    let connect_future = handshake(&tls_conn, &domain, stream);

    match peer.connection_timeout() {
        Some(t) => match pingora_timeout::timeout(t, connect_future).await {
            Ok(res) => res,
            Err(_) => Error::e_explain(
                ConnectTimedout,
                format!("connecting to server {}, timeout {:?}", peer, t),
            ),
        },
        None => connect_future.await,
    }
}

#[allow(dead_code)]
#[derive(Debug)]
pub enum VerificationMode {
    SkipHostname,
    SkipAll,
    Full,
    // Note: "Full" Included for completeness, making this verifier self-contained
    // and explicit about all possible verification modes, not just exceptions.
}

#[derive(Debug)]
pub struct CustomServerCertVerifier {
    delegate: Arc<WebPkiServerVerifier>,
    verification_mode: VerificationMode,
}

impl CustomServerCertVerifier {
    pub fn new(delegate: Arc<WebPkiServerVerifier>, verification_mode: VerificationMode) -> Self {
        Self {
            delegate,
            verification_mode,
        }
    }
}

// CustomServerCertVerifier delegates TLS signature verification and allows 3 VerificationMode:
// Full: delegates all verification to the original WebPkiServerVerifier
// SkipHostname: same as "Full" but ignores "NotValidForName" certificate errors
// SkipAll: all certificate verification checks are skipped.
impl RusTlsServerCertVerifier for CustomServerCertVerifier {
    fn verify_server_cert(
        &self,
        _end_entity: &CertificateDer<'_>,
        _intermediates: &[CertificateDer<'_>],
        _server_name: &ServerName<'_>,
        _ocsp: &[u8],
        _now: UnixTime,
    ) -> Result<ServerCertVerified, RusTlsError> {
        match self.verification_mode {
            VerificationMode::Full => self.delegate.verify_server_cert(
                _end_entity,
                _intermediates,
                _server_name,
                _ocsp,
                _now,
            ),
            VerificationMode::SkipHostname => {
                match self.delegate.verify_server_cert(
                    _end_entity,
                    _intermediates,
                    _server_name,
                    _ocsp,
                    _now,
                ) {
                    Ok(scv) => Ok(scv),
                    Err(RusTlsError::InvalidCertificate(cert_error)) => {
                        if let CertificateError::NotValidForNameContext { .. } = cert_error {
                            Ok(ServerCertVerified::assertion())
                        } else {
                            Err(RusTlsError::InvalidCertificate(cert_error))
                        }
                    }
                    Err(e) => Err(e),
                }
            }
            VerificationMode::SkipAll => Ok(ServerCertVerified::assertion()),
        }
    }

    fn verify_tls12_signature(
        &self,
        message: &[u8],
        cert: &CertificateDer<'_>,
        dss: &DigitallySignedStruct,
    ) -> Result<HandshakeSignatureValid, RusTlsError> {
        self.delegate.verify_tls12_signature(message, cert, dss)
    }

    fn verify_tls13_signature(
        &self,
        message: &[u8],
        cert: &CertificateDer<'_>,
        dss: &DigitallySignedStruct,
    ) -> Result<HandshakeSignatureValid, RusTlsError> {
        self.delegate.verify_tls13_signature(message, cert, dss)
    }

    fn supported_verify_schemes(&self) -> Vec<SignatureScheme> {
        self.delegate.supported_verify_schemes()
    }
}


================================================
FILE: pingora-core/src/connectors/tls/s2n/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::hash::{Hash, Hasher};
use std::num::NonZero;
use std::sync::{Arc, Mutex};

use ahash::AHasher;
use lru::LruCache;
use pingora_error::{Error, Result};
use pingora_error::{ErrorType::*, OrErr};

use pingora_s2n::{
    load_pem_file, ClientAuthType, Config, IgnoreVerifyHostnameCallback,
    TlsConnector as S2NTlsConnector, DEFAULT_TLS13,
};

use crate::utils::tls::{CertKey, X509Pem};
use crate::{
    connectors::ConnectorOptions,
    listeners::ALPN,
    protocols::{
        tls::{client::handshake, S2NConnectionBuilder, TlsStream},
        IO,
    },
    upstreams::peer::Peer,
};

const DEFAULT_CONFIG_CACHE_SIZE: NonZero<usize> = NonZero::new(10).unwrap();

#[derive(Clone)]
pub struct Connector {
    pub ctx: TlsConnector,
}

impl Connector {
    /// Create a new connector based on the optional configurations. If no
    /// configurations are provided, no customized certificates or keys will be
    /// used
    pub fn new(options: Option<ConnectorOptions>) -> Self {
        Connector {
            ctx: TlsConnector::new(options),
        }
    }
}

/// Holds default options for configuring a TLS connection and an LRU cache for `s2n_config`.
///
/// In `s2n-tls`, each connection requires an associated `s2n_config`, which is expensive to create.
/// Although `s2n_config` objects can be cheaply cloned, they are immutable once built.
///
/// To avoid the overhead of constructing a new config for every connection, we maintain a cache
/// that stores previously built configs. Configs are retrieved from the cache based on the
/// configuration options used to create them.
#[derive(Clone)]
pub struct TlsConnector {
    config_cache: Option<Arc<Mutex<LruCache<u64, Config>>>>,
    options: Option<ConnectorOptions>,
}

impl TlsConnector {
    pub fn new(options: Option<ConnectorOptions>) -> Self {
        TlsConnector {
            config_cache: Self::create_config_cache(&options),
            options,
        }
    }

    /// Provided with a set of config options, either creates a new s2n config or
    /// fetches one from the LRU Cache.
    fn load_config(&self, config_options: S2NConfigOptions) -> Result<Config> {
        if self.config_cache.is_some() {
            let config_hash = config_options.config_hash();
            if let Some(config) = self.load_config_from_cache(config_hash) {
                return Ok(config);
            } else {
                let config = create_s2n_config(&self.options, config_options)?;
                self.put_config_in_cache(config_hash, config.clone());
                return Ok(config);
            }
        } else {
            create_s2n_config(&self.options, config_options)
        }
    }

    fn load_config_from_cache(&self, config_hash: u64) -> Option<Config> {
        if let Some(config_cache) = &self.config_cache {
            let mut cache = config_cache.lock().unwrap();
            cache.get(&config_hash).cloned()
        } else {
            None
        }
    }

    fn put_config_in_cache(&self, config_hash: u64, config: Config) {
        if let Some(config_cache) = &self.config_cache {
            let mut cache = config_cache.lock().unwrap();
            cache.put(config_hash, config);
        }
    }

    fn create_config_cache(
        options: &Option<ConnectorOptions>,
    ) -> Option<Arc<Mutex<LruCache<u64, Config>>>> {
        let mut cache_size = DEFAULT_CONFIG_CACHE_SIZE;
        if let Some(opts) = options {
            if let Some(cache_size_config) = opts.s2n_config_cache_size {
                if cache_size_config <= 0 {
                    return None;
                } else {
                    cache_size = NonZero::new(cache_size_config).unwrap();
                }
            }
        }
        return Some(Arc::new(Mutex::new(LruCache::new(cache_size))));
    }
}

pub(crate) async fn connect<T, P>(
    stream: T,
    peer: &P,
    alpn_override: Option<ALPN>,
    tls_ctx: &TlsConnector,
) -> Result<TlsStream<T>>
where
    T: IO,
    P: Peer + Send + Sync,
{
    // Default security policy with TLS 1.3 support
    // https://aws.github.io/s2n-tls/usage-guide/ch06-security-policies.html
    let security_policy = peer.get_s2n_security_policy().unwrap_or(&DEFAULT_TLS13);

    let config_options = S2NConfigOptions::from_peer(peer, alpn_override);
    let config = tls_ctx.load_config(config_options)?;

    let connection_builder = S2NConnectionBuilder {
        config: config,
        psk_config: peer.get_psk().cloned(),
        security_policy: Some(security_policy.clone()),
    };

    let domain = peer
        .alternative_cn()
        .map(|s| s.as_str())
        .unwrap_or(peer.sni());
    let connector = S2NTlsConnector::new(connection_builder);

    let connect_future = handshake(&connector, domain, stream);

    match peer.connection_timeout() {
        Some(t) => match pingora_timeout::timeout(t, connect_future).await {
            Ok(res) => res,
            Err(_) => Error::e_explain(
                ConnectTimedout,
                format!("connecting to server {}, timeout {:?}", peer, t),
            ),
        },
        None => connect_future.await,
    }
}

fn create_s2n_config(
    connector_options: &Option<ConnectorOptions>,
    config_options: S2NConfigOptions,
) -> Result<Config> {
    let mut builder = Config::builder();

    if let Some(conf) = connector_options.as_ref() {
        if let Some(ca_file_path) = conf.ca_file.as_ref() {
            let ca_pem = load_pem_file(&ca_file_path)?;
            builder
                .trust_pem(&ca_pem)
                .or_err(InternalError, "failed to load ca cert")?;
        }

        if let Some((cert_file, key_file)) = conf.cert_key_file.as_ref() {
            let cert = load_pem_file(cert_file)?;
            let key = load_pem_file(key_file)?;
            builder
                .load_pem(&cert, &key)
                .or_err(InternalError, "failed to load client cert")?;
            builder
                .set_client_auth_type(ClientAuthType::Required)
                .or_err(InternalError, "failed to load client key")?;
        }
    }

    if let Some(max_blinding_delay) = config_options.max_blinding_delay {
        builder
            .set_max_blinding_delay(max_blinding_delay)
            .or_err(InternalError, "failed to set max blinding delay")?;
    }

    if let Some(ca) = config_options.ca {
        builder
            .trust_pem(&ca.raw_pem)
            .or_err(InternalError, "invalid peer ca cert")?;
    }

    if let Some(client_cert_key) = config_options.client_cert_key {
        builder
            .load_pem(&client_cert_key.raw_pem(), &client_cert_key.key())
            .or_err(InternalError, "invalid peer client cert or key")?;
    }

    if let Some(alpn) = config_options.alpn {
        builder
            .set_application_protocol_preference(alpn.to_wire_protocols())
            .or_err(InternalError, "failed to set peer alpn")?;
    }

    if !config_options.verify_cert {
        // Disabling x509 verification is considered unsafe
        unsafe {
            builder
                .disable_x509_verification()
                .or_err(InternalError, "failed to disable certificate verification")?;
        }
    }

    if !config_options.verify_hostname {
        // Set verify hostname callback that always returns success
        builder
            .set_verify_host_callback(IgnoreVerifyHostnameCallback::new())
            .or_err(InternalError, "failed to disable hostname verification")?;
    }

    if !config_options.use_system_certs {
        builder.with_system_certs(false).or_err(
            InternalError,
            "failed to disable system certificate loading",
        )?;
    }

    Ok(builder
        .build()
        .or_err(InternalError, "failed to build s2n config")?)
}

#[derive(Clone)]
struct S2NConfigOptions {
    max_blinding_delay: Option<u32>,
    alpn: Option<ALPN>,
    verify_cert: bool,
    verify_hostname: bool,
    use_system_certs: bool,
    ca: Option<Arc<X509Pem>>,
    client_cert_key: Option<Arc<CertKey>>,
}

impl S2NConfigOptions {
    fn from_peer<P>(peer: &P, alpn_override: Option<ALPN>) -> Self
    where
        P: Peer + Send + Sync,
    {
        S2NConfigOptions {
            max_blinding_delay: peer.get_max_blinding_delay(),
            alpn: alpn_override.or(peer.get_alpn().cloned()),
            verify_cert: peer.verify_cert(),
            verify_hostname: peer.verify_hostname(),
            use_system_certs: peer.use_system_certs(),
            ca: peer.get_ca().cloned(),
            client_cert_key: peer.get_client_cert_key().cloned(),
        }
    }

    fn config_hash(&self) -> u64 {
        let mut hasher = AHasher::default();
        self.hash(&mut hasher);
        hasher.finish()
    }
}

impl Hash for S2NConfigOptions {
    fn hash<H: Hasher>(&self, state: &mut H) {
        self.max_blinding_delay.hash(state);
        self.alpn.hash(state);
        self.verify_cert.hash(state);
        self.verify_hostname.hash(state);
        self.use_system_certs.hash(state);
        self.ca.hash(state);
        self.client_cert_key.hash(state);
    }
}

#[cfg(test)]
mod tests {
    use std::{fs, sync::Arc};

    use crate::{
        connectors::tls::{s2n::S2NConfigOptions, TlsConnector},
        listeners::ALPN,
        utils::tls::{CertKey, X509Pem},
    };

    const CA_CERT_FILE: &str = "tests/certs/ca.crt";
    const ALT_CA_CERT_FILE: &str = "tests/certs/alt-ca.crt";

    const CERT_FILE: &str = "tests/certs/server.crt";
    const ALT_CERT_FILE: &str = "tests/certs/alt-server.crt";

    const KEY_FILE: &str = "tests/certs/server.key";

    fn read_file(file: &str) -> Vec<u8> {
        fs::read(file).unwrap()
    }

    fn load_pem_from_file(file: &str) -> X509Pem {
        X509Pem::new(read_file(file))
    }

    fn create_config_options() -> S2NConfigOptions {
        S2NConfigOptions {
            max_blinding_delay: Some(10),
            alpn: Some(ALPN::H1),
            verify_cert: true,
            verify_hostname: true,
            use_system_certs: true,
            ca: Some(Arc::new(load_pem_from_file(CA_CERT_FILE))),
            client_cert_key: Some(Arc::new(CertKey::new(
                read_file(CERT_FILE),
                read_file(KEY_FILE),
            ))),
        }
    }

    #[test]
    fn config_cache_hit_identical() {
        let connector = TlsConnector::new(None);
        let config_options = create_config_options();

        let config = connector.load_config(config_options.clone()).unwrap();
        let cached_config = connector.load_config_from_cache(config_options.config_hash());

        assert!(cached_config.is_some());
        assert_eq!(config, cached_config.unwrap());
    }

    #[test]
    fn config_cache_miss_max_blinding_delay_changed() {
        let connector = TlsConnector::new(None);
        let mut config_options = create_config_options();

        let _config = connector.load_config(config_options.clone()).unwrap();
        config_options.max_blinding_delay = Some(20);
        let cached_config = connector.load_config_from_cache(config_options.config_hash());

        assert!(cached_config.is_none());
    }

    #[test]
    fn config_cache_miss_alpn_changed() {
        let connector = TlsConnector::new(None);
        let mut config_options = create_config_options();

        let _config = connector.load_config(config_options.clone()).unwrap();
        config_options.alpn = Some(ALPN::H2H1);
        let cached_config = connector.load_config_from_cache(config_options.config_hash());

        assert!(cached_config.is_none());
    }

    #[test]
    fn config_cache_miss_verify_cert_changed() {
        let connector = TlsConnector::new(None);
        let mut config_options = create_config_options();

        let _config = connector.load_config(config_options.clone()).unwrap();
        config_options.verify_cert = false;
        let cached_config = connector.load_config_from_cache(config_options.config_hash());

        assert!(cached_config.is_none());
    }

    #[test]
    fn config_cache_miss_verify_hostname_changed() {
        let connector = TlsConnector::new(None);
        let mut config_options = create_config_options();

        let _config = connector.load_config(config_options.clone()).unwrap();
        config_options.verify_hostname = false;
        let cached_config = connector.load_config_from_cache(config_options.config_hash());

        assert!(cached_config.is_none());
    }

    #[test]
    fn config_cache_miss_use_system_certs_changed() {
        let connector = TlsConnector::new(None);
        let mut config_options = create_config_options();

        let _config = connector.load_config(config_options.clone()).unwrap();
        config_options.use_system_certs = false;
        let cached_config = connector.load_config_from_cache(config_options.config_hash());

        assert!(cached_config.is_none());
    }

    #[test]
    fn config_cache_miss_ca_changed() {
        let connector = TlsConnector::new(None);
        let mut config_options = create_config_options();

        let _config = connector.load_config(config_options.clone()).unwrap();
        config_options.ca = Some(Arc::new(load_pem_from_file(ALT_CA_CERT_FILE)));
        let cached_config = connector.load_config_from_cache(config_options.config_hash());

        assert!(cached_config.is_none());
    }

    #[test]
    fn config_cache_miss_client_cert_key_changed() {
        let connector = TlsConnector::new(None);
        let mut config_options = create_config_options();

        let _config = connector.load_config(config_options.clone()).unwrap();
        config_options.client_cert_key = Some(Arc::new(CertKey::new(
            read_file(ALT_CERT_FILE),
            read_file(KEY_FILE),
        )));
        let cached_config = connector.load_config_from_cache(config_options.config_hash());

        assert!(cached_config.is_none());
    }
}


================================================
FILE: pingora-core/src/lib.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#![warn(clippy::all)]
#![allow(clippy::new_without_default)]
#![allow(clippy::type_complexity)]
#![allow(clippy::match_wild_err_arm)]
#![allow(clippy::missing_safety_doc)]
#![allow(clippy::upper_case_acronyms)]

//! # Pingora
//!
//! Pingora is a collection of service frameworks and network libraries battle-tested by the Internet.
//! It is to build robust, scalable and secure network infrastructures and services at Internet scale.
//!
//! # Features
//! - Http 1.x and Http 2
//! - Modern TLS with OpenSSL or BoringSSL (FIPS compatible)
//! - Zero downtime upgrade
//!
//! # Usage
//! This crate provides low level service and protocol implementation and abstraction.
//!
//! If looking to build a (reverse) proxy, see [`pingora-proxy`](https://docs.rs/pingora-proxy) crate.
//!
//! # Optional features
//!
//! ## TLS backends (mutually exclusive)
//! - `openssl`: Use OpenSSL as the TLS library (default if no TLS feature is specified)
//! - `boringssl`: Use BoringSSL as the TLS library (FIPS compatible)
//! - `rustls`: Use Rustls as the TLS library
//!
//! ## Additional features
//! - `connection_filter`: Enable early TCP connection filtering before TLS handshake.
//!   This allows implementing custom logic to accept/reject connections based on peer address
//!   with zero overhead when disabled.
//! - `sentry`: Enable Sentry error reporting integration
//! - `patched_http1`: Enable patched HTTP/1 parser
//!
//! # Connection Filtering
//!
//! With the `connection_filter` feature enabled, you can implement early connection filtering
//! at the TCP level, before any TLS handshake or HTTP processing occurs. This is useful for:
//! - IP-based access control
//! - Rate limiting at the connection level
//! - Geographic restrictions
//! - DDoS mitigation
//!
//! ## Example
//!
//! ```rust,ignore
//! # #[cfg(feature = "connection_filter")]
//! # {
//! use async_trait::async_trait;
//! use pingora_core::listeners::ConnectionFilter;
//! use std::net::SocketAddr;
//! use std::sync::Arc;
//!
//! #[derive(Debug)]
//! struct MyFilter;
//!
//! #[async_trait]
//! impl ConnectionFilter for MyFilter {
//!     async fn should_accept(&self, addr: &SocketAddr) -> bool {
//!         // Custom logic to filter connections
//!         !is_blocked_ip(addr.ip())
//!     }
//! }
//!
//! // Apply the filter to a service
//! let mut service = my_service();
//! service.set_connection_filter(Arc::new(MyFilter));
//! # }
//! ```
//!
//! When the `connection_filter` feature is disabled, the filter API remains available
//! but becomes a no-op, ensuring zero overhead for users who don't need this functionality.

// This enables the feature that labels modules that are only available with
// certain pingora features
#![cfg_attr(docsrs, feature(doc_cfg))]

pub mod apps;
pub mod connectors;
pub mod listeners;
pub mod modules;
pub mod protocols;
pub mod server;
pub mod services;
pub mod upstreams;
pub mod utils;

pub use pingora_error::{ErrorType::*, *};

// If both openssl and boringssl are enabled, prefer boringssl.
// This is to make sure that boringssl can override the default openssl feature
// when this crate is used indirectly by other crates.
#[cfg(feature = "boringssl")]
pub use pingora_boringssl as tls;

#[cfg(feature = "openssl")]
pub use pingora_openssl as tls;

#[cfg(feature = "rustls")]
pub use pingora_rustls as tls;

#[cfg(feature = "s2n")]
pub use pingora_s2n as tls;

#[cfg(not(feature = "any_tls"))]
pub use protocols::tls::noop_tls as tls;

pub mod prelude {
    pub use crate::server::configuration::Opt;
    pub use crate::server::Server;
    pub use crate::services::background::background_service;
    pub use crate::upstreams::peer::HttpPeer;
    pub use pingora_error::{ErrorType::*, *};
}


================================================
FILE: pingora-core/src/listeners/connection_filter.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Connection filtering trait for early connection filtering
//!
//! This module provides the [`ConnectionFilter`] trait which allows filtering
//! incoming connections at the TCP level, before the TLS handshake occurs.
//!
//! # Feature Flag
//!
//! This functionality requires the `connection_filter` feature to be enabled:
//! ```toml
//! [dependencies]
//! pingora-core = { version = "0.5", features = ["connection_filter"] }
//! ```
//!
//! When the feature is disabled, a no-op implementation is provided for API compatibility.

use async_trait::async_trait;
use std::fmt::Debug;
use std::net::SocketAddr;

/// A trait for filtering incoming connections at the TCP level.
///
/// Implementations of this trait can inspect the peer address of incoming
/// connections and decide whether to accept or reject them before any
/// further processing (including TLS handshake) occurs.
///
/// # Example
///
/// ```rust,no_run
/// use async_trait::async_trait;
/// use pingora_core::listeners::ConnectionFilter;
/// use std::net::{IpAddr, Ipv4Addr, SocketAddr};
///
/// #[derive(Debug)]
/// struct BlocklistFilter {
///     blocked_ips: Vec<IpAddr>,
/// }
///
/// #[async_trait]
/// impl ConnectionFilter for BlocklistFilter {
///     async fn should_accept(&self, addr: &SocketAddr) -> bool {
///         !self.blocked_ips.contains(&addr.ip())
///     }
/// }
/// ```
///
/// # Performance Considerations
///
/// This filter is called for every incoming connection, so implementations
/// should be efficient. Consider caching or pre-computing data structures
/// for IP filtering rather than doing expensive operations per connection.
#[async_trait]
pub trait ConnectionFilter: Debug + Send + Sync {
    /// Determines whether an incoming connection should be accepted.
    ///
    /// This method is called after a TCP connection is accepted but before
    /// any further processing (including TLS handshake).
    ///
    /// # Arguments
    ///
    /// * `addr` - The socket address of the incoming connection
    ///
    /// # Returns
    ///
    /// * `true` - Accept the connection and continue processing
    /// * `false` - Drop the connection immediately
    ///
    /// # Example
    ///
    /// ```rust,no_run
    /// async fn should_accept(&self, addr: &SocketAddr) -> bool {
    ///     // Accept only connections from private IP ranges
    ///     match addr.ip() {
    ///         IpAddr::V4(ip) => ip.is_private(),
    ///         IpAddr::V6(_) => true,
    ///     }
    /// }
    ///
    async fn should_accept(&self, _addr: Option<&SocketAddr>) -> bool {
        true
    }
}

/// Default implementation that accepts all connections.
///
/// This filter accepts all incoming connections without any filtering.
/// It's used as the default when no custom filter is specified.
#[derive(Debug, Clone)]
pub struct AcceptAllFilter;

#[async_trait]
impl ConnectionFilter for AcceptAllFilter {
    // Uses default implementation
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::net::{IpAddr, Ipv4Addr};

    #[derive(Debug, Clone)]
    struct BlockListFilter {
        blocked_ips: Vec<IpAddr>,
    }

    #[async_trait]
    impl ConnectionFilter for BlockListFilter {
        async fn should_accept(&self, addr_opt: Option<&SocketAddr>) -> bool {
            addr_opt
                .map(|addr| !self.blocked_ips.contains(&addr.ip()))
                .unwrap_or(true)
        }
    }

    #[tokio::test]
    async fn test_accept_all_filter() {
        let filter = AcceptAllFilter;
        let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), 8080);
        assert!(filter.should_accept(Some(&addr)).await);
    }

    #[tokio::test]
    async fn test_blocklist_filter() {
        let filter = BlockListFilter {
            blocked_ips: vec![IpAddr::V4(Ipv4Addr::new(192, 168, 1, 1))],
        };

        let blocked_addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(192, 168, 1, 1)), 8080);
        let allowed_addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(192, 168, 1, 2)), 8080);

        assert!(!filter.should_accept(Some(&blocked_addr)).await);
        assert!(filter.should_accept(Some(&allowed_addr)).await);
    }
}


================================================
FILE: pingora-core/src/listeners/l4.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#[cfg(feature = "connection_filter")]
use log::debug;
use log::warn;
use pingora_error::{
    ErrorType::{AcceptError, BindError},
    OrErr, Result,
};
use std::io::ErrorKind;
use std::net::{SocketAddr, ToSocketAddrs};
#[cfg(unix)]
use std::os::unix::io::{AsRawFd, FromRawFd};
#[cfg(unix)]
use std::os::unix::net::UnixListener as StdUnixListener;
#[cfg(windows)]
use std::os::windows::io::{AsRawSocket, FromRawSocket};
use std::time::Duration;
use std::{fs::Permissions, sync::Arc};
use tokio::net::TcpSocket;

#[cfg(feature = "connection_filter")]
use super::connection_filter::ConnectionFilter;
#[cfg(feature = "connection_filter")]
use crate::listeners::AcceptAllFilter;

use crate::protocols::l4::ext::{set_dscp, set_recv_buf, set_snd_buf, set_tcp_fastopen_backlog};
use crate::protocols::l4::listener::Listener;
pub use crate::protocols::l4::stream::Stream;
#[cfg(feature = "connection_filter")]
use crate::protocols::GetSocketDigest;
use crate::protocols::TcpKeepalive;
#[cfg(unix)]
use crate::server::ListenFds;

const TCP_LISTENER_MAX_TRY: usize = 30;
const TCP_LISTENER_TRY_STEP: Duration = Duration::from_secs(1);
// TODO: configurable backlog
const LISTENER_BACKLOG: u32 = 65535;

/// Address for listening server, either TCP/UDS socket.
#[derive(Clone, Debug)]
pub enum ServerAddress {
    Tcp(String, Option<TcpSocketOptions>),
    #[cfg(unix)]
    Uds(String, Option<Permissions>),
}

impl AsRef<str> for ServerAddress {
    fn as_ref(&self) -> &str {
        match &self {
            Self::Tcp(l, _) => l,
            #[cfg(unix)]
            Self::Uds(l, _) => l,
        }
    }
}

impl ServerAddress {
    fn tcp_sock_opts(&self) -> Option<&TcpSocketOptions> {
        match &self {
            Self::Tcp(_, op) => op.into(),
            _ => None,
        }
    }
}

/// TCP socket configuration options, this is used for setting options on
/// listening sockets and accepted connections.
#[non_exhaustive]
#[derive(Clone, Debug, Default)]
pub struct TcpSocketOptions {
    /// IPV6_V6ONLY flag (if true, limit socket to IPv6 communication only).
    /// This is mostly useful when binding to `[::]`, which on most Unix distributions
    /// will bind to both IPv4 and IPv6 addresses by default.
    pub ipv6_only: Option<bool>,
    /// Enable TCP fast open and set the backlog size of it.
    /// See the [man page](https://man7.org/linux/man-pages/man7/tcp.7.html) for more information.
    pub tcp_fastopen: Option<usize>,
    /// Enable TCP keepalive on accepted connections.
    /// See the [man page](https://man7.org/linux/man-pages/man7/tcp.7.html) for more information.
    pub tcp_keepalive: Option<TcpKeepalive>,
    /// Specifies the server should set the following DSCP value on outgoing connections.
    /// See the [RFC](https://datatracker.ietf.org/doc/html/rfc2474) for more details.
    pub dscp: Option<u8>,
    /// Enable SO_REUSEPORT to allow multiple sockets to bind to the same address and port.
    /// This is useful for load balancing across multiple worker processes.
    /// See the [man page](https://man7.org/linux/man-pages/man7/socket.7.html) for more information.
    pub so_reuseport: Option<bool>,
    /// Set the send buffer size for accepted connections. See
    /// [SO_SNDBUF](https://man7.org/linux/man-pages/man7/socket.7.html).
    pub tcp_snd_buf: Option<usize>,
    /// Set the receive buffer size for accepted connections. See
    /// [SO_RCVBUF](https://man7.org/linux/man-pages/man7/socket.7.html).
    pub tcp_recv_buf: Option<usize>,
    // TODO: allow configuring reuseaddr, backlog, etc. from here?
}

#[cfg(unix)]
mod uds {
    use super::{OrErr, Result};
    use crate::protocols::l4::listener::Listener;
    use log::{debug, error};
    use pingora_error::ErrorType::BindError;
    use std::fs::{self, Permissions};
    use std::io::ErrorKind;
    use std::os::unix::fs::PermissionsExt;
    use std::os::unix::net::UnixListener as StdUnixListener;
    use tokio::net::UnixListener;

    use super::LISTENER_BACKLOG;

    pub(super) fn set_perms(path: &str, perms: Option<Permissions>) -> Result<()> {
        // set read/write permissions for all users on the socket by default
        let perms = perms.unwrap_or(Permissions::from_mode(0o666));
        fs::set_permissions(path, perms).or_err_with(BindError, || {
            format!("Fail to bind to {path}, could not set permissions")
        })
    }

    pub(super) fn set_backlog(l: StdUnixListener, backlog: u32) -> Result<UnixListener> {
        let socket: socket2::Socket = l.into();
        // Note that we call listen on an already listening socket
        // POSIX undefined but on Linux it will update the backlog size
        socket
            .listen(backlog as i32)
            .or_err_with(BindError, || format!("listen() failed on {socket:?}"))?;
        UnixListener::from_std(socket.into()).or_err(BindError, "Failed to convert to tokio socket")
    }

    pub(super) fn bind(addr: &str, perms: Option<Permissions>) -> Result<Listener> {
        /*
          We remove the filename/address in case there is a dangling reference.

          "Binding to a socket with a filename creates a socket in the
          filesystem that must be deleted by the caller when it is no
          longer needed (using unlink(2))"
        */
        match std::fs::remove_file(addr) {
            Ok(()) => {
                debug!("unlink {addr} done");
            }
            Err(e) => match e.kind() {
                ErrorKind::NotFound => debug!("unlink {addr} not found: {e}"),
                _ => error!("unlink {addr} failed: {e}"),
            },
        }
        let listener_socket = UnixListener::bind(addr)
            .or_err_with(BindError, || format!("Bind() failed on {addr}"))?;
        set_perms(addr, perms)?;
        let std_listener = listener_socket.into_std().unwrap();
        Ok(set_backlog(std_listener, LISTENER_BACKLOG)?.into())
    }
}

// currently, these options can only apply on sockets prior to calling bind()
fn apply_tcp_socket_options(sock: &TcpSocket, opt: Option<&TcpSocketOptions>) -> Result<()> {
    let Some(opt) = opt else {
        return Ok(());
    };

    let socket_ref = socket2::SockRef::from(sock);

    if let Some(ipv6_only) = opt.ipv6_only {
        socket_ref
            .set_only_v6(ipv6_only)
            .or_err(BindError, "failed to set IPV6_V6ONLY")?;
    }

    #[cfg(unix)]
    if let Some(reuseport) = opt.so_reuseport {
        socket_ref
            .set_reuse_port(reuseport)
            .or_err(BindError, "failed to set SO_REUSEPORT")?;
    }

    #[cfg(unix)]
    let raw = sock.as_raw_fd();
    #[cfg(windows)]
    let raw = sock.as_raw_socket();

    if let Some(backlog) = opt.tcp_fastopen {
        set_tcp_fastopen_backlog(raw, backlog)?;
    }

    if let Some(dscp) = opt.dscp {
        set_dscp(raw, dscp)?;
    }
    Ok(())
}

fn from_raw_fd(address: &ServerAddress, fd: i32) -> Result<Listener> {
    match address {
        #[cfg(unix)]
        ServerAddress::Uds(addr, perm) => {
            let std_listener = unsafe { StdUnixListener::from_raw_fd(fd) };
            // set permissions just in case
            uds::set_perms(addr, perm.clone())?;
            Ok(uds::set_backlog(std_listener, LISTENER_BACKLOG)?.into())
        }
        ServerAddress::Tcp(_, _) => {
            #[cfg(unix)]
            let std_listener_socket = unsafe { std::net::TcpStream::from_raw_fd(fd) };
            #[cfg(windows)]
            let std_listener_socket = unsafe { std::net::TcpStream::from_raw_socket(fd as u64) };
            let listener_socket = TcpSocket::from_std_stream(std_listener_socket);
            // Note that we call listen on an already listening socket
            // POSIX undefined but on Linux it will update the backlog size
            Ok(listener_socket
                .listen(LISTENER_BACKLOG)
                .or_err_with(BindError, || format!("Listen() failed on {address:?}"))?
                .into())
        }
    }
}

async fn bind_tcp(addr: &str, opt: Option<TcpSocketOptions>) -> Result<Listener> {
    let mut try_count = 0;
    loop {
        let sock_addr = addr
            .to_socket_addrs() // NOTE: this could invoke a blocking network lookup
            .or_err_with(BindError, || format!("Invalid listen address {addr}"))?
            .next() // take the first one for now
            .unwrap(); // assume there is always at least one

        let listener_socket = match sock_addr {
            SocketAddr::V4(_) => TcpSocket::new_v4(),
            SocketAddr::V6(_) => TcpSocket::new_v6(),
        }
        .or_err_with(BindError, || format!("fail to create address {sock_addr}"))?;

        // NOTE: this is to preserve the current TcpListener::bind() behavior.
        // We have a few tests relying on this behavior to allow multiple identical
        // test servers to coexist.
        listener_socket
            .set_reuseaddr(true)
            .or_err(BindError, "fail to set_reuseaddr(true)")?;

        apply_tcp_socket_options(&listener_socket, opt.as_ref())?;

        match listener_socket.bind(sock_addr) {
            Ok(()) => {
                break Ok(listener_socket
                    .listen(LISTENER_BACKLOG)
                    .or_err(BindError, "bind() failed")?
                    .into())
            }
            Err(e) => {
                if e.kind() != ErrorKind::AddrInUse {
                    break Err(e).or_err_with(BindError, || format!("bind() failed on {addr}"));
                }
                try_count += 1;
                if try_count >= TCP_LISTENER_MAX_TRY {
                    break Err(e).or_err_with(BindError, || {
                        format!("bind() failed, after retries, {addr} still in use")
                    });
                }
                warn!("{addr} is in use, will try again");
                tokio::time::sleep(TCP_LISTENER_TRY_STEP).await;
            }
        }
    }
}

async fn bind(addr: &ServerAddress) -> Result<Listener> {
    match addr {
        #[cfg(unix)]
        ServerAddress::Uds(l, perm) => uds::bind(l, perm.clone()),
        ServerAddress::Tcp(l, opt) => bind_tcp(l, opt.clone()).await,
    }
}

#[derive(Clone, Debug)]
pub struct ListenerEndpoint {
    listen_addr: ServerAddress,
    listener: Arc<Listener>,
    #[cfg(feature = "connection_filter")]
    connection_filter: Arc<dyn ConnectionFilter>,
}

#[derive(Default)]
pub struct ListenerEndpointBuilder {
    listen_addr: Option<ServerAddress>,
    #[cfg(feature = "connection_filter")]
    connection_filter: Option<Arc<dyn ConnectionFilter>>,
}

impl ListenerEndpointBuilder {
    pub fn new() -> ListenerEndpointBuilder {
        Self {
            listen_addr: None,
            #[cfg(feature = "connection_filter")]
            connection_filter: None,
        }
    }

    pub fn listen_addr(&mut self, addr: ServerAddress) -> &mut Self {
        self.listen_addr = Some(addr);
        self
    }

    #[cfg(feature = "connection_filter")]
    pub fn connection_filter(&mut self, filter: Arc<dyn ConnectionFilter>) -> &mut Self {
        self.connection_filter = Some(filter);
        self
    }

    #[cfg(unix)]
    pub async fn listen(self, fds: Option<ListenFds>) -> Result<ListenerEndpoint> {
        let listen_addr = self
            .listen_addr
            .expect("Tried to listen with no addr specified");

        let listener = if let Some(fds_table) = fds {
            let addr_str = listen_addr.as_ref();

            // consider make this mutex std::sync::Mutex or OnceCell
            let mut table = fds_table.lock().await;

            if let Some(fd) = table.get(addr_str) {
                from_raw_fd(&listen_addr, *fd)?
            } else {
                // not found
                let listener = bind(&listen_addr).await?;
                table.add(addr_str.to_string(), listener.as_raw_fd());
                listener
            }
        } else {
            // not found, no fd table
            bind(&listen_addr).await?
        };

        #[cfg(feature = "connection_filter")]
        let connection_filter = self
            .connection_filter
            .unwrap_or_else(|| Arc::new(AcceptAllFilter));

        Ok(ListenerEndpoint {
            listen_addr,
            listener: Arc::new(listener),
            #[cfg(feature = "connection_filter")]
            connection_filter,
        })
    }

    #[cfg(windows)]
    pub async fn listen(self) -> Result<ListenerEndpoint> {
        let listen_addr = self
            .listen_addr
            .expect("Tried to listen with no addr specified");

        let listener = bind(&listen_addr).await?;

        #[cfg(feature = "connection_filter")]
        let connection_filter = self
            .connection_filter
            .unwrap_or_else(|| Arc::new(AcceptAllFilter));

        Ok(ListenerEndpoint {
            listen_addr,
            listener: Arc::new(listener),
            #[cfg(feature = "connection_filter")]
            connection_filter,
        })
    }
}

impl ListenerEndpoint {
    pub fn builder() -> ListenerEndpointBuilder {
        ListenerEndpointBuilder::new()
    }

    pub fn as_str(&self) -> &str {
        self.listen_addr.as_ref()
    }

    fn apply_stream_settings(&self, stream: &mut Stream) -> Result<()> {
        // settings are applied based on whether the underlying stream supports it
        stream.set_nodelay()?;
        let Some(op) = self.listen_addr.tcp_sock_opts() else {
            return Ok(());
        };
        if let Some(ka) = op.tcp_keepalive.as_ref() {
            stream.set_keepalive(ka)?;
        }
        if let Some(dscp) = op.dscp {
            #[cfg(unix)]
            set_dscp(stream.as_raw_fd(), dscp)?;
            #[cfg(windows)]
            set_dscp(stream.as_raw_socket(), dscp)?;
        }
        if let Some(snd_buf) = op.tcp_snd_buf {
            #[cfg(unix)]
            set_snd_buf(stream.as_raw_fd(), snd_buf)?;
            #[cfg(windows)]
            set_snd_buf(stream.as_raw_socket(), snd_buf)?;
        }
        if let Some(recv_buf) = op.tcp_recv_buf {
            #[cfg(unix)]
            set_recv_buf(stream.as_raw_fd(), recv_buf)?;
            #[cfg(windows)]
            set_recv_buf(stream.as_raw_socket(), recv_buf)?;
        }
        Ok(())
    }

    pub async fn accept(&self) -> Result<Stream> {
        #[cfg(feature = "connection_filter")]
        {
            loop {
                let mut stream = self
                    .listener
                    .accept()
                    .await
                    .or_err(AcceptError, "Fail to accept()")?;

                // Performance: nested if-let avoids cloning/allocations on each connection accept
                let should_accept = if let Some(digest) = stream.get_socket_digest() {
                    if let Some(peer_addr) = digest.peer_addr() {
                        self.connection_filter
                            .should_accept(peer_addr.as_inet())
                            .await
                    } else {
                        // No peer address available - accept by default
                        true
                    }
                } else {
                    // No socket digest available - accept by default
                    true
                };

                if !should_accept {
                    debug!("Connection rejected by filter");
                    drop(stream);
                    continue;
                }

                self.apply_stream_settings(&mut stream)?;
                return Ok(stream);
            }
        }
        #[cfg(not(feature = "connection_filter"))]
        {
            let mut stream = self
                .listener
                .accept()
                .await
                .or_err(AcceptError, "Fail to accept()")?;
            self.apply_stream_settings(&mut stream)?;
            Ok(stream)
        }
    }
}

#[cfg(test)]
mod test {
    use super::*;

    #[tokio::test]
    async fn test_listen_tcp() {
        let addr = "127.0.0.1:7100";

        let mut builder = ListenerEndpoint::builder();

        builder.listen_addr(ServerAddress::Tcp(addr.into(), None));

        #[cfg(unix)]
        let listener = builder.listen(None).await.unwrap();

        #[cfg(windows)]
        let listener = builder.listen().await.unwrap();

        tokio::spawn(async move {
            // just try to accept once
            listener.accept().await.unwrap();
        });
        tokio::net::TcpStream::connect(addr)
            .await
            .expect("can connect to TCP listener");
    }

    #[tokio::test]
    async fn test_listen_tcp_ipv6_only() {
        let sock_opt = Some(TcpSocketOptions {
            ipv6_only: Some(true),
            ..Default::default()
        });

        let mut builder = ListenerEndpoint::builder();

        builder.listen_addr(ServerAddress::Tcp("[::]:7101".into(), sock_opt));

        #[cfg(unix)]
        let listener = builder.listen(None).await.unwrap();

        #[cfg(windows)]
        let listener = builder.listen().await.unwrap();

        tokio::spawn(async move {
            // just try to accept twice
            listener.accept().await.unwrap();
            listener.accept().await.unwrap();
        });
        tokio::net::TcpStream::connect("127.0.0.1:7101")
            .await
            .expect_err("cannot connect to v4 addr");
        tokio::net::TcpStream::connect("[::1]:7101")
            .await
            .expect("can connect to v6 addr");
    }

    #[cfg(unix)]
    #[tokio::test]
    async fn test_listen_uds() {
        let addr = "/tmp/test_listen_uds";

        let mut builder = ListenerEndpoint::builder();

        builder.listen_addr(ServerAddress::Uds(addr.into(), None));

        let listener = builder.listen(None).await.unwrap();

        tokio::spawn(async move {
            // just try to accept once
            listener.accept().await.unwrap();
        });
        tokio::net::UnixStream::connect(addr)
            .await
            .expect("can connect to UDS listener");
    }

    #[cfg(unix)]
    #[tokio::test]
    async fn test_tcp_so_reuseport() {
        let addr = "127.0.0.1:7201";
        let sock_opt = TcpSocketOptions {
            so_reuseport: Some(true),
            ..Default::default()
        };

        // Create first listener with SO_REUSEPORT
        let mut builder1 = ListenerEndpoint::builder();
        builder1.listen_addr(ServerAddress::Tcp(addr.into(), Some(sock_opt.clone())));
        let listener1 = builder1.listen(None).await.unwrap();

        // Create second listener with the same address and SO_REUSEPORT
        // This should succeed because SO_REUSEPORT is enabled
        let mut builder2 = ListenerEndpoint::builder();
        builder2.listen_addr(ServerAddress::Tcp(addr.into(), Some(sock_opt)));
        let listener2 = builder2.listen(None).await.unwrap();

        // Both listeners should be able to bind to the same address
        assert_eq!(listener1.as_str(), addr);
        assert_eq!(listener2.as_str(), addr);
    }

    #[tokio::test]
    async fn test_tcp_so_reuseport_false() {
        let addr = "127.0.0.1:7202";
        let sock_opt_no_reuseport = TcpSocketOptions {
            so_reuseport: Some(false), // Explicitly disable SO_REUSEPORT
            ..Default::default()
        };

        // Create first listener without SO_REUSEPORT
        let mut builder1 = ListenerEndpoint::builder();
        builder1.listen_addr(ServerAddress::Tcp(
            addr.into(),
            Some(sock_opt_no_reuseport.clone()),
        ));
        let listener1 = builder1.listen(None).await.unwrap();

        // Try to create second listener with the same address and no SO_REUSEPORT
        // This should fail with "address already in use"
        let mut builder2 = ListenerEndpoint::builder();
        builder2.listen_addr(ServerAddress::Tcp(addr.into(), Some(sock_opt_no_reuseport)));
        let result = builder2.listen(None).await;

        // The second bind should fail
        assert!(result.is_err());
        let error_msg = format!("{:?}", result.unwrap_err());
        assert!(
            error_msg.contains("address")
                || error_msg.contains("in use")
                || error_msg.contains("bind")
        );

        // Verify the first listener still works
        assert_eq!(listener1.as_str(), addr);
    }

    #[cfg(feature = "connection_filter")]
    #[tokio::test]
    async fn test_connection_filter_accept() {
        use crate::listeners::ConnectionFilter;
        use async_trait::async_trait;
        use std::sync::atomic::{AtomicUsize, Ordering};

        #[derive(Debug)]
        struct CountingFilter {
            accept_count: Arc<AtomicUsize>,
            reject_count: Arc<AtomicUsize>,
        }

        #[async_trait]
        impl ConnectionFilter for CountingFilter {
            async fn should_accept(&self, _addr: Option<&SocketAddr>) -> bool {
                let count = self.accept_count.fetch_add(1, Ordering::SeqCst);
                if count % 2 == 0 {
                    true
                } else {
                    self.reject_count.fetch_add(1, Ordering::SeqCst);
                    false
                }
            }
        }

        let addr = "127.0.0.1:7300";
        let accept_count = Arc::new(AtomicUsize::new(0));
        let reject_count = Arc::new(AtomicUsize::new(0));

        let filter = Arc::new(CountingFilter {
            accept_count: accept_count.clone(),
            reject_count: reject_count.clone(),
        });

        let mut builder = ListenerEndpoint::builder();
        builder
            .listen_addr(ServerAddress::Tcp(addr.into(), None))
            .connection_filter(filter);

        #[cfg(unix)]
        let listener = builder.listen(None).await.unwrap();
        #[cfg(windows)]
        let listener = builder.listen().await.unwrap();

        let listener_clone = listener.clone();
        tokio::spawn(async move {
            let _stream1 = listener_clone.accept().await.unwrap();
            let _stream2 = listener_clone.accept().await.unwrap();
        });

        tokio::time::sleep(Duration::from_millis(10)).await;

        let _conn1 = tokio::net::TcpStream::connect(addr).await.unwrap();
        let _conn2 = tokio::net::TcpStream::connect(addr).await.unwrap();
        let _conn3 = tokio::net::TcpStream::connect(addr).await.unwrap();

        tokio::time::sleep(Duration::from_millis(50)).await;

        assert_eq!(accept_count.load(Ordering::SeqCst), 3);
        assert_eq!(reject_count.load(Ordering::SeqCst), 1);
    }

    #[cfg(feature = "connection_filter")]
    #[tokio::test]
    async fn test_connection_filter_blocks_all() {
        use crate::listeners::ConnectionFilter;
        use async_trait::async_trait;
        use std::sync::atomic::{AtomicUsize, Ordering};

        #[derive(Debug)]
        struct RejectAllFilter {
            reject_count: Arc<AtomicUsize>,
        }

        #[async_trait]
        impl ConnectionFilter for RejectAllFilter {
            async fn should_accept(&self, _addr: Option<&SocketAddr>) -> bool {
                self.reject_count.fetch_add(1, Ordering::SeqCst);
                false
            }
        }

        let addr = "127.0.0.1:7301";
        let reject_count = Arc::new(AtomicUsize::new(0));

        let mut builder = ListenerEndpoint::builder();
        builder
            .listen_addr(ServerAddress::Tcp(addr.into(), None))
            .connection_filter(Arc::new(RejectAllFilter {
                reject_count: reject_count.clone(),
            }));

        #[cfg(unix)]
        let listener = builder.listen(None).await.unwrap();
        #[cfg(windows)]
        let listener = builder.listen().await.unwrap();

        let listener_clone = listener.clone();
        let _accept_handle = tokio::spawn(async move {
            // This will never return since all connections are rejected
            let _ = listener_clone.accept().await;
        });

        tokio::time::sleep(Duration::from_millis(50)).await;

        let mut handles = vec![];
        for _ in 0..3 {
            let handle = tokio::spawn(async move {
                if let Ok(stream) = tokio::net::TcpStream::connect(addr).await {
                    drop(stream);
                }
            });
            handles.push(handle);
        }

        for handle in handles {
            let _ = handle.await;
        }

        // Wait for rejections to be counted with timeout
        let start = tokio::time::Instant::now();
        let timeout = Duration::from_secs(2);

        loop {
            let rejected = reject_count.load(Ordering::SeqCst);
            if rejected >= 3 {
                assert_eq!(rejected, 3, "Should reject exactly 3 connections");
                break;
            }

            if start.elapsed() > timeout {
                panic!(
                    "Timeout waiting for rejections, got {} expected 3",
                    rejected
                );
            }

            tokio::time::sleep(Duration::from_millis(10)).await;
        }
    }
}


================================================
FILE: pingora-core/src/listeners/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! The listening endpoints (TCP and TLS) and their configurations.
//!
//! This module provides the infrastructure for setting up network listeners
//! that accept incoming connections. It supports TCP, Unix domain sockets,
//! and TLS endpoints.
//!
//! # Connection Filtering
//!
//! With the `connection_filter` feature enabled, this module also provides
//! early connection filtering capabilities through the [`ConnectionFilter`] trait.
//! This allows dropping unwanted connections at the TCP level before any
//! expensive operations like TLS handshakes.
//!
//! ## Example with Connection Filtering
//!
//! ```rust,no_run
//! # #[cfg(feature = "connection_filter")]
//! # {
//! use pingora_core::listeners::{Listeners, ConnectionFilter};
//! use std::sync::Arc;
//!
//! // Create a custom filter
//! let filter = Arc::new(MyCustomFilter::new());
//!
//! // Apply to listeners
//! let mut listeners = Listeners::new();
//! listeners.set_connection_filter(filter);
//! listeners.add_tcp("0.0.0.0:8080");
//! # }
//! ```

mod l4;

#[cfg(feature = "connection_filter")]
pub mod connection_filter;

#[cfg(feature = "connection_filter")]
pub use connection_filter::{AcceptAllFilter, ConnectionFilter};

#[cfg(not(feature = "connection_filter"))]
#[derive(Debug, Clone)]
pub struct AcceptAllFilter;

#[cfg(not(feature = "connection_filter"))]
pub trait ConnectionFilter: std::fmt::Debug + Send + Sync {
    fn should_accept(&self, _addr: &std::net::SocketAddr) -> bool {
        true
    }
}

#[cfg(not(feature = "connection_filter"))]
impl ConnectionFilter for AcceptAllFilter {
    fn should_accept(&self, _addr: &std::net::SocketAddr) -> bool {
        true
    }
}
#[cfg(feature = "any_tls")]
pub mod tls;

#[cfg(not(feature = "any_tls"))]
pub use crate::tls::listeners as tls;

use crate::protocols::{l4::socket::SocketAddr, tls::TlsRef, Stream};

#[cfg(unix)]
use crate::server::ListenFds;

use async_trait::async_trait;
use pingora_error::Result;
use std::{any::Any, fs::Permissions, sync::Arc};

use l4::{ListenerEndpoint, Stream as L4Stream};
use tls::{Acceptor, TlsSettings};

pub use crate::protocols::tls::ALPN;
use crate::protocols::GetSocketDigest;
pub use l4::{ServerAddress, TcpSocketOptions};

/// The APIs to customize things like certificate during TLS server side handshake
#[async_trait]
pub trait TlsAccept {
    // TODO: return error?
    /// This function is called in the middle of a TLS handshake. Structs who
    /// implement this function should provide tls certificate and key to the
    /// [TlsRef] via `ssl_use_certificate` and `ssl_use_private_key`.
    /// Note. This is only supported for openssl and boringssl
    async fn certificate_callback(&self, _ssl: &mut TlsRef) -> () {
        // does nothing by default
    }

    /// This function is called after the TLS handshake is complete.
    ///
    /// Any value returned from this function (other than `None`) will be stored in the
    /// `extension` field of `SslDigest`. This allows you to attach custom application-specific
    /// data to the TLS connection, which will be accessible from the HTTP layer via the
    /// `SslDigest` attached to the session digest.
    async fn handshake_complete_callback(
        &self,
        _ssl: &TlsRef,
    ) -> Option<Arc<dyn Any + Send + Sync>> {
        None
    }
}

pub type TlsAcceptCallbacks = Box<dyn TlsAccept + Send + Sync>;

struct TransportStackBuilder {
    l4: ServerAddress,
    tls: Option<TlsSettings>,
    #[cfg(feature = "connection_filter")]
    connection_filter: Option<Arc<dyn ConnectionFilter>>,
}

impl TransportStackBuilder {
    pub async fn build(
        &mut self,
        #[cfg(unix)] upgrade_listeners: Option<ListenFds>,
    ) -> Result<TransportStack> {
        let mut builder = ListenerEndpoint::builder();

        builder.listen_addr(self.l4.clone());

        #[cfg(feature = "connection_filter")]
        if let Some(filter) = &self.connection_filter {
            builder.connection_filter(filter.clone());
        }

        #[cfg(unix)]
        let l4 = builder.listen(upgrade_listeners).await?;

        #[cfg(windows)]
        let l4 = builder.listen().await?;

        Ok(TransportStack {
            l4,
            tls: self.tls.take().map(|tls| Arc::new(tls.build())),
        })
    }
}

#[derive(Clone)]
pub(crate) struct TransportStack {
    l4: ListenerEndpoint,
    tls: Option<Arc<Acceptor>>,
}

impl TransportStack {
    pub fn as_str(&self) -> &str {
        self.l4.as_str()
    }

    pub async fn accept(&self) -> Result<UninitializedStream> {
        let stream = self.l4.accept().await?;
        Ok(UninitializedStream {
            l4: stream,
            tls: self.tls.clone(),
        })
    }

    pub fn cleanup(&mut self) {
        // placeholder
    }
}

pub(crate) struct UninitializedStream {
    l4: L4Stream,
    tls: Option<Arc<Acceptor>>,
}

impl UninitializedStream {
    pub async fn handshake(mut self) -> Result<Stream> {
        self.l4.set_buffer();
        if let Some(tls) = self.tls {
            let tls_stream = tls.tls_handshake(self.l4).await?;
            Ok(Box::new(tls_stream))
        } else {
            Ok(Box::new(self.l4))
        }
    }

    /// Get the peer address of the connection if available
    pub fn peer_addr(&self) -> Option<SocketAddr> {
        self.l4
            .get_socket_digest()
            .and_then(|d| d.peer_addr().cloned())
    }
}

/// The struct to hold one more multiple listening endpoints
pub struct Listeners {
    stacks: Vec<TransportStackBuilder>,
    #[cfg(feature = "connection_filter")]
    connection_filter: Option<Arc<dyn ConnectionFilter>>,
}

impl Listeners {
    /// Create a new [`Listeners`] with no listening endpoints.
    pub fn new() -> Self {
        Listeners {
            stacks: vec![],
            #[cfg(feature = "connection_filter")]
            connection_filter: None,
        }
    }
    /// Create a new [`Listeners`] with a TCP server endpoint from the given string.
    pub fn tcp(addr: &str) -> Self {
        let mut listeners = Self::new();
        listeners.add_tcp(addr);
        listeners
    }

    /// Create a new [`Listeners`] with a Unix domain socket endpoint from the given string.
    #[cfg(unix)]
    pub fn uds(addr: &str, perm: Option<Permissions>) -> Self {
        let mut listeners = Self::new();
        listeners.add_uds(addr, perm);
        listeners
    }

    /// Create a new [`Listeners`] with a TLS (TCP) endpoint with the given address string,
    /// and path to the certificate/private key pairs.
    /// This endpoint will adopt the [Mozilla Intermediate](https://wiki.mozilla.org/Security/Server_Side_TLS#Intermediate_compatibility_.28recommended.29)
    /// server side TLS settings.
    pub fn tls(addr: &str, cert_path: &str, key_path: &str) -> Result<Self> {
        let mut listeners = Self::new();
        listeners.add_tls(addr, cert_path, key_path)?;
        Ok(listeners)
    }

    /// Add a TCP endpoint to `self`.
    pub fn add_tcp(&mut self, addr: &str) {
        self.add_address(ServerAddress::Tcp(addr.into(), None));
    }

    /// Add a TCP endpoint to `self`, with the given [`TcpSocketOptions`].
    pub fn add_tcp_with_settings(&mut self, addr: &str, sock_opt: TcpSocketOptions) {
        self.add_address(ServerAddress::Tcp(addr.into(), Some(sock_opt)));
    }

    /// Add a Unix domain socket endpoint to `self`.
    #[cfg(unix)]
    pub fn add_uds(&mut self, addr: &str, perm: Option<Permissions>) {
        self.add_address(ServerAddress::Uds(addr.into(), perm));
    }

    /// Add a TLS endpoint to `self` with the [Mozilla Intermediate](https://wiki.mozilla.org/Security/Server_Side_TLS#Intermediate_compatibility_.28recommended.29)
    /// server side TLS settings.
    pub fn add_tls(&mut self, addr: &str, cert_path: &str, key_path: &str) -> Result<()> {
        self.add_tls_with_settings(addr, None, TlsSettings::intermediate(cert_path, key_path)?);
        Ok(())
    }

    /// Add a TLS endpoint to `self` with the given socket and server side TLS settings.
    /// See [`TlsSettings`] and [`TcpSocketOptions`] for more details.
    pub fn add_tls_with_settings(
        &mut self,
        addr: &str,
        sock_opt: Option<TcpSocketOptions>,
        settings: TlsSettings,
    ) {
        self.add_endpoint(ServerAddress::Tcp(addr.into(), sock_opt), Some(settings));
    }

    /// Add the given [`ServerAddress`] to `self`.
    pub fn add_address(&mut self, addr: ServerAddress) {
        self.add_endpoint(addr, None);
    }

    /// Set a connection filter for all endpoints in this listener collection
    #[cfg(feature = "connection_filter")]
    pub fn set_connection_filter(&mut self, filter: Arc<dyn ConnectionFilter>) {
        log::debug!("Setting connection filter on Listeners");

        // Store the filter for future endpoints
        self.connection_filter = Some(filter.clone());

        // Apply to existing stacks
        for stack in &mut self.stacks {
            stack.connection_filter = Some(filter.clone());
        }
    }

    /// Add the given [`ServerAddress`] to `self` with the given [`TlsSettings`] if provided
    pub fn add_endpoint(&mut self, l4: ServerAddress, tls: Option<TlsSettings>) {
        self.stacks.push(TransportStackBuilder {
            l4,
            tls,
            #[cfg(feature = "connection_filter")]
            connection_filter: self.connection_filter.clone(),
        })
    }

    pub(crate) async fn build(
        &mut self,
        #[cfg(unix)] upgrade_listeners: Option<ListenFds>,
    ) -> Result<Vec<TransportStack>> {
        let mut stacks = Vec::with_capacity(self.stacks.len());

        for b in self.stacks.iter_mut() {
            let new_stack = b
                .build(
                    #[cfg(unix)]
                    upgrade_listeners.clone(),
                )
                .await?;

            stacks.push(new_stack);
        }

        Ok(stacks)
    }

    pub(crate) fn cleanup(&self) {
        // placeholder
    }
}

#[cfg(test)]
mod test {
    use super::*;
    #[cfg(feature = "connection_filter")]
    use std::sync::atomic::{AtomicUsize, Ordering};
    #[cfg(feature = "any_tls")]
    use tokio::io::AsyncWriteExt;
    use tokio::net::TcpStream;
    use tokio::time::{sleep, Duration};

    #[tokio::test]
    async fn test_listen_tcp() {
        let addr1 = "127.0.0.1:7101";
        let addr2 = "127.0.0.1:7102";
        let mut listeners = Listeners::tcp(addr1);
        listeners.add_tcp(addr2);

        let listeners = listeners
            .build(
                #[cfg(unix)]
                None,
            )
            .await
            .unwrap();

        assert_eq!(listeners.len(), 2);
        for listener in listeners {
            tokio::spawn(async move {
                // just try to accept once
                let stream = listener.accept().await.unwrap();
                stream.handshake().await.unwrap();
            });
        }

        // make sure the above starts before the lines below
        sleep(Duration::from_millis(10)).await;

        TcpStream::connect(addr1).await.unwrap();
        TcpStream::connect(addr2).await.unwrap();
    }

    #[tokio::test]
    #[cfg(feature = "any_tls")]
    async fn test_listen_tls() {
        use tokio::io::AsyncReadExt;

        let addr = "127.0.0.1:7103";
        let cert_path = format!("{}/tests/keys/server.crt", env!("CARGO_MANIFEST_DIR"));
        let key_path = format!("{}/tests/keys/key.pem", env!("CARGO_MANIFEST_DIR"));
        let mut listeners = Listeners::tls(addr, &cert_path, &key_path).unwrap();
        let listener = listeners
            .build(
                #[cfg(unix)]
                None,
            )
            .await
            .unwrap()
            .pop()
            .unwrap();

        tokio::spawn(async move {
            // just try to accept once
            let stream = listener.accept().await.unwrap();
            let mut stream = stream.handshake().await.unwrap();
            let mut buf = [0; 1024];
            let _ = stream.read(&mut buf).await.unwrap();
            stream
                .write_all(b"HTTP/1.1 200 OK\r\nContent-Length: 1\r\n\r\na")
                .await
                .unwrap();
        });
        // make sure the above starts before the lines below
        sleep(Duration::from_millis(10)).await;

        let client = reqwest::Client::builder()
            .danger_accept_invalid_certs(true)
            .build()
            .unwrap();

        let res = client.get(format!("https://{addr}")).send().await.unwrap();
        assert_eq!(res.status(), reqwest::StatusCode::OK);
    }

    #[cfg(feature = "connection_filter")]
    #[test]
    fn test_connection_filter_inheritance() {
        #[derive(Debug, Clone)]
        struct TestFilter {
            counter: Arc<AtomicUsize>,
        }

        #[async_trait]
        impl ConnectionFilter for TestFilter {
            async fn should_accept(&self, _addr: Option<&std::net::SocketAddr>) -> bool {
                self.counter.fetch_add(1, Ordering::SeqCst);
                true
            }
        }

        let mut listeners = Listeners::new();

        // Add an endpoint before setting filter
        listeners.add_tcp("127.0.0.1:7104");

        // Set the connection filter
        let filter = Arc::new(TestFilter {
            counter: Arc::new(AtomicUsize::new(0)),
        });
        listeners.set_connection_filter(filter.clone());

        // Add endpoints after setting filter
        listeners.add_tcp("127.0.0.1:7105");
        #[cfg(feature = "any_tls")]
        {
            // Only test TLS if the feature is enabled
            if let Ok(tls_settings) = TlsSettings::intermediate(
                &format!("{}/tests/keys/server.crt", env!("CARGO_MANIFEST_DIR")),
                &format!("{}/tests/keys/key.pem", env!("CARGO_MANIFEST_DIR")),
            ) {
                listeners.add_tls_with_settings("127.0.0.1:7106", None, tls_settings);
            }
        }

        // Verify all stacks have the filter (only when feature is enabled)
        for stack in &listeners.stacks {
            assert!(
                stack.connection_filter.is_some(),
                "All stacks should have the connection filter set"
            );
        }
    }
}


================================================
FILE: pingora-core/src/listeners/tls/boringssl_openssl/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use log::debug;
use pingora_error::{ErrorType, OrErr, Result};
use std::ops::{Deref, DerefMut};

use crate::listeners::tls::boringssl_openssl::alpn::valid_alpn;
pub use crate::protocols::tls::ALPN;
use crate::protocols::IO;
use crate::tls::ssl::AlpnError;
use crate::tls::ssl::{SslAcceptor, SslAcceptorBuilder, SslFiletype, SslMethod};
use crate::{
    listeners::TlsAcceptCallbacks,
    protocols::tls::{
        server::{handshake, handshake_with_callback},
        SslStream,
    },
};
pub const TLS_CONF_ERR: ErrorType = ErrorType::Custom("TLSConfigError");

pub(crate) struct Acceptor {
    ssl_acceptor: SslAcceptor,
    callbacks: Option<TlsAcceptCallbacks>,
}

/// The TLS settings of a listening endpoint
pub struct TlsSettings {
    accept_builder: SslAcceptorBuilder,
    callbacks: Option<TlsAcceptCallbacks>,
}

impl From<SslAcceptorBuilder> for TlsSettings {
    fn from(settings: SslAcceptorBuilder) -> Self {
        TlsSettings {
            accept_builder: settings,
            callbacks: None,
        }
    }
}

impl Deref for TlsSettings {
    type Target = SslAcceptorBuilder;

    fn deref(&self) -> &Self::Target {
        &self.accept_builder
    }
}

impl DerefMut for TlsSettings {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.accept_builder
    }
}

impl TlsSettings {
    /// Create a new [`TlsSettings`] with the [Mozilla Intermediate](https://wiki.mozilla.org/Security/Server_Side_TLS#Intermediate_compatibility_.28recommended.29)
    /// server side TLS settings. Users can adjust the TLS settings after this object is created.
    /// Return error if the provided certificate and private key are invalid or not found.
    pub fn intermediate(cert_path: &str, key_path: &str) -> Result<Self> {
        let mut accept_builder = SslAcceptor::mozilla_intermediate_v5(SslMethod::tls()).or_err(
            TLS_CONF_ERR,
            "fail to create mozilla_intermediate_v5 Acceptor",
        )?;
        accept_builder
            .set_private_key_file(key_path, SslFiletype::PEM)
            .or_err_with(TLS_CONF_ERR, || format!("fail to read key file {key_path}"))?;
        accept_builder
            .set_certificate_chain_file(cert_path)
            .or_err_with(TLS_CONF_ERR, || {
                format!("fail to read cert file {cert_path}")
            })?;
        Ok(TlsSettings {
            accept_builder,
            callbacks: None,
        })
    }

    /// Create a new [`TlsSettings`] similar to [TlsSettings::intermediate()]. A struct that implements [TlsAcceptCallbacks]
    /// is needed to provide the certificate during the TLS handshake.
    pub fn with_callbacks(callbacks: TlsAcceptCallbacks) -> Result<Self> {
        let accept_builder = SslAcceptor::mozilla_intermediate_v5(SslMethod::tls()).or_err(
            TLS_CONF_ERR,
            "fail to create mozilla_intermediate_v5 Acceptor",
        )?;
        Ok(TlsSettings {
            accept_builder,
            callbacks: Some(callbacks),
        })
    }

    /// Enable HTTP/2 support for this endpoint, which is default off.
    /// This effectively sets the ALPN to prefer HTTP/2 with HTTP/1.1 allowed
    pub fn enable_h2(&mut self) {
        self.set_alpn(ALPN::H2H1);
    }

    /// Set the ALPN preference of this endpoint. See [`ALPN`] for more details
    pub fn set_alpn(&mut self, alpn: ALPN) {
        match alpn {
            ALPN::H2H1 => self
                .accept_builder
                .set_alpn_select_callback(alpn::prefer_h2),
            ALPN::H1 => self.accept_builder.set_alpn_select_callback(alpn::h1_only),
            ALPN::H2 => self.accept_builder.set_alpn_select_callback(alpn::h2_only),
            ALPN::Custom(custom) => {
                self.accept_builder
                    .set_alpn_select_callback(move |_, alpn_in| {
                        if !valid_alpn(alpn_in) {
                            return Err(AlpnError::NOACK);
                        }
                        match alpn::select_protocol(alpn_in, custom.protocol()) {
                            Some(p) => Ok(p),
                            None => Err(AlpnError::NOACK),
                        }
                    });
            }
        }
    }

    pub(crate) fn build(self) -> Acceptor {
        Acceptor {
            ssl_acceptor: self.accept_builder.build(),
            callbacks: self.callbacks,
        }
    }
}

impl Acceptor {
    pub async fn tls_handshake<S: IO>(&self, stream: S) -> Result<SslStream<S>> {
        debug!("new ssl session");
        // TODO: be able to offload this handshake in a thread pool
        if let Some(cb) = self.callbacks.as_ref() {
            handshake_with_callback(&self.ssl_acceptor, stream, cb).await
        } else {
            handshake(&self.ssl_acceptor, stream).await
        }
    }
}

mod alpn {
    use super::*;
    use crate::tls::ssl::{select_next_proto, AlpnError, SslRef};

    pub(super) fn valid_alpn(alpn_in: &[u8]) -> bool {
        if alpn_in.is_empty() {
            return false;
        }
        // TODO: can add more thorough validation here.
        true
    }

    /// Finds the first protocol in the client-offered ALPN list that matches the given protocol.
    ///
    /// This is a helper for ALPN negotiation. It iterates over the client's protocol list
    /// (in wire format) and returns the first protocol that matches proto
    /// The returned reference always points into `client_protocols`, so lifetimes are correct.
    pub(super) fn select_protocol<'a>(
        client_protocols: &'a [u8],
        proto: &[u8],
    ) -> Option<&'a [u8]> {
        let mut bytes = client_protocols;
        while !bytes.is_empty() {
            let len = bytes[0] as usize;
            bytes = &bytes[1..];
            if len == proto.len() && &bytes[..len] == proto {
                return Some(&bytes[..len]);
            }
            bytes = &bytes[len..];
        }
        None
    }

    // A standard implementation provided by the SSL lib is used below

    pub fn prefer_h2<'a>(_ssl: &mut SslRef, alpn_in: &'a [u8]) -> Result<&'a [u8], AlpnError> {
        if !valid_alpn(alpn_in) {
            return Err(AlpnError::NOACK);
        }
        match select_next_proto(ALPN::H2H1.to_wire_preference(), alpn_in) {
            Some(p) => Ok(p),
            _ => Err(AlpnError::NOACK), // unknown ALPN, just ignore it. Most clients will fallback to h1
        }
    }

    pub fn h1_only<'a>(_ssl: &mut SslRef, alpn_in: &'a [u8]) -> Result<&'a [u8], AlpnError> {
        if !valid_alpn(alpn_in) {
            return Err(AlpnError::NOACK);
        }
        match select_next_proto(ALPN::H1.to_wire_preference(), alpn_in) {
            Some(p) => Ok(p),
            _ => Err(AlpnError::NOACK), // unknown ALPN, just ignore it. Most clients will fallback to h1
        }
    }

    pub fn h2_only<'a>(_ssl: &mut SslRef, alpn_in: &'a [u8]) -> Result<&'a [u8], AlpnError> {
        if !valid_alpn(alpn_in) {
            return Err(AlpnError::ALERT_FATAL);
        }
        match select_next_proto(ALPN::H2.to_wire_preference(), alpn_in) {
            Some(p) => Ok(p),
            _ => Err(AlpnError::ALERT_FATAL), // cannot agree
        }
    }
}


================================================
FILE: pingora-core/src/listeners/tls/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#[cfg(feature = "openssl_derived")]
mod boringssl_openssl;

#[cfg(feature = "openssl_derived")]
pub use boringssl_openssl::*;

#[cfg(feature = "rustls")]
mod rustls;

#[cfg(feature = "rustls")]
pub use rustls::*;

#[cfg(feature = "s2n")]
mod s2n;

#[cfg(feature = "s2n")]
pub use s2n::*;


================================================
FILE: pingora-core/src/listeners/tls/rustls/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::sync::Arc;

use crate::listeners::TlsAcceptCallbacks;
use crate::protocols::tls::{server::handshake, server::handshake_with_callback, TlsStream};
use log::debug;
use pingora_error::ErrorType::InternalError;
use pingora_error::{Error, OrErr, Result};
use pingora_rustls::load_certs_and_key_files;
use pingora_rustls::ClientCertVerifier;
use pingora_rustls::ServerConfig;
use pingora_rustls::{version, TlsAcceptor as RusTlsAcceptor};

use crate::protocols::{ALPN, IO};

/// The TLS settings of a listening endpoint
pub struct TlsSettings {
    alpn_protocols: Option<Vec<Vec<u8>>>,
    cert_path: String,
    key_path: String,
    client_cert_verifier: Option<Arc<dyn ClientCertVerifier>>,
}

pub struct Acceptor {
    pub acceptor: RusTlsAcceptor,
    callbacks: Option<TlsAcceptCallbacks>,
}

impl TlsSettings {
    /// Create a Rustls acceptor based on the current setting for certificates,
    /// keys, and protocols.
    ///
    /// _NOTE_ This function will panic if there is an error in loading
    /// certificate files or constructing the builder
    ///
    /// Todo: Return a result instead of panicking XD
    pub fn build(self) -> Acceptor {
        let Ok(Some((certs, key))) = load_certs_and_key_files(&self.cert_path, &self.key_path)
        else {
            panic!(
                "Failed to load provided certificates \"{}\" or key \"{}\".",
                self.cert_path, self.key_path
            )
        };

        let builder =
            ServerConfig::builder_with_protocol_versions(&[&version::TLS12, &version::TLS13]);
        let builder = if let Some(verifier) = self.client_cert_verifier {
            builder.with_client_cert_verifier(verifier)
        } else {
            builder.with_no_client_auth()
        };
        let mut config = builder
            .with_single_cert(certs, key)
            .explain_err(InternalError, |e| {
                format!("Failed to create server listener config: {e}")
            })
            .unwrap();

        if let Some(alpn_protocols) = self.alpn_protocols {
            config.alpn_protocols = alpn_protocols;
        }

        Acceptor {
            acceptor: RusTlsAcceptor::from(Arc::new(config)),
            callbacks: None,
        }
    }

    /// Enable HTTP/2 support for this endpoint, which is default off.
    /// This effectively sets the ALPN to prefer HTTP/2 with HTTP/1.1 allowed
    pub fn enable_h2(&mut self) {
        self.set_alpn(ALPN::H2H1);
    }

    pub fn set_alpn(&mut self, alpn: ALPN) {
        self.alpn_protocols = Some(alpn.to_wire_protocols());
    }

    /// Configure mTLS by providing a rustls client certificate verifier.
    pub fn set_client_cert_verifier(&mut self, verifier: Arc<dyn ClientCertVerifier>) {
        self.client_cert_verifier = Some(verifier);
    }

    pub fn intermediate(cert_path: &str, key_path: &str) -> Result<Self>
    where
        Self: Sized,
    {
        Ok(TlsSettings {
            alpn_protocols: None,
            cert_path: cert_path.to_string(),
            key_path: key_path.to_string(),
            client_cert_verifier: None,
        })
    }

    pub fn with_callbacks() -> Result<Self>
    where
        Self: Sized,
    {
        // TODO: verify if/how callback in handshake can be done using Rustls
        Error::e_explain(
            InternalError,
            "Certificate callbacks are not supported with feature \"rustls\".",
        )
    }
}

impl Acceptor {
    pub async fn tls_handshake<S: IO>(&self, stream: S) -> Result<TlsStream<S>> {
        debug!("new tls session");
        // TODO: be able to offload this handshake in a thread pool
        if let Some(cb) = self.callbacks.as_ref() {
            handshake_with_callback(self, stream, cb).await
        } else {
            handshake(self, stream).await
        }
    }
}


================================================
FILE: pingora-core/src/listeners/tls/s2n/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::sync::Arc;

use log::debug;
use pingora_error::Result;
use pingora_s2n::{
    load_certs_and_key_files, ClientAuthType, Config, IgnoreVerifyHostnameCallback, S2NPolicy,
    TlsAcceptor, DEFAULT_TLS13,
};

use crate::protocols::tls::server::handshake;
use crate::protocols::tls::{CaType, PskConfig, PskType, S2NConnectionBuilder, TlsStream};
use crate::protocols::{ALPN, IO};

/// The TLS settings of a listening endpoint
pub struct TlsSettings {
    cert_path: Option<String>,
    key_path: Option<String>,
    ca: Option<CaType>,
    alpn: Option<ALPN>,
    psk_config: Option<Arc<PskType>>,
    security_policy: Option<S2NPolicy>,
    client_auth_required: bool,
    verify_client_hostname: bool,
    max_blinding_delay: Option<u32>,
}

pub struct Acceptor {
    pub acceptor: TlsAcceptor<S2NConnectionBuilder>,
}

impl TlsSettings {
    pub fn build(self) -> Acceptor {
        let mut builder = Config::builder();

        // Default security policy with TLS 1.3 support
        // https://aws.github.io/s2n-tls/usage-guide/ch06-security-policies.html
        let policy = self.security_policy.unwrap_or(DEFAULT_TLS13);

        if let Some(max_blinding_delay) = self.max_blinding_delay {
            builder.set_max_blinding_delay(max_blinding_delay).unwrap();
        }

        if self.client_auth_required {
            builder
                .set_client_auth_type(ClientAuthType::Required)
                .unwrap();
        }

        if let Some(alpn) = self.alpn {
            builder
                .set_application_protocol_preference(alpn.to_wire_protocols())
                .unwrap();
        }

        if let (Some(cert_path), Some(key_path)) = (self.cert_path, self.key_path) {
            let Ok((cert, key)) = load_certs_and_key_files(&cert_path, &key_path) else {
                panic!(
                    "Failed to load provided certificates \"{}\" or key \"{}\".",
                    cert_path, key_path
                )
            };

            builder.load_pem(&cert, &key).unwrap();
        }

        if let Some(ca) = self.ca {
            builder.trust_pem(&ca.raw_pem).expect("invalid ca pem");
        }

        if !self.verify_client_hostname {
            builder
                .set_verify_host_callback(IgnoreVerifyHostnameCallback::new())
                .unwrap();
        }

        let config = builder.build().unwrap();
        let connection_builder = S2NConnectionBuilder {
            config: config,
            psk_config: self.psk_config.clone(),
            security_policy: Some(policy.clone()),
        };

        Acceptor {
            acceptor: TlsAcceptor::new(connection_builder),
        }
    }

    /// Enable HTTP/2 support for this endpoint, which is default off.
    /// This effectively sets the ALPN to prefer HTTP/2 with HTTP/1.1 allowed
    pub fn enable_h2(&mut self) {
        self.set_alpn(ALPN::H2H1);
    }

    fn set_alpn(&mut self, alpn: ALPN) {
        self.alpn = Some(alpn);
    }

    /// Configure CA to use for mTLS
    pub fn set_ca(&mut self, ca: CaType) {
        self.ca = Some(ca);
    }

    /// Configure pre-shared keys to use for TLS-PSK handshake
    /// https://datatracker.ietf.org/doc/html/rfc4279
    pub fn set_psk_config(&mut self, psk_config: PskConfig) {
        self.psk_config = Some(Arc::new(psk_config));
    }

    /// S2N-TLS security policy to use. If not set, the default policy
    /// "default_tls13" will be used.
    /// https://aws.github.io/s2n-tls/usage-guide/ch06-security-policies.html
    pub fn set_policy(&mut self, policy: S2NPolicy) {
        self.security_policy = Some(policy);
    }

    /// The certificate and private key to use for TLS connections
    pub fn set_cert(&mut self, cert_path: &str, key_path: &str) {
        self.cert_path = Some(cert_path.to_string());
        self.key_path = Some(key_path.to_string());
    }

    /// Require client certificate authentication (mTLS)
    pub fn set_client_auth_required(&mut self, required: bool) {
        self.client_auth_required = required;
    }

    /// If validating client certificate, also verify client hostname (mTLS)
    pub fn set_verify_client_hostname(&mut self, verify: bool) {
        self.verify_client_hostname = verify;
    }

    /// S2N-TLS will delay a response up to the max blinding delay (default 30)
    /// seconds whenever an error triggered by a peer occurs to mitigate against
    /// timing side channels.
    pub fn set_max_blinding_delay(&mut self, delay: u32) {
        self.max_blinding_delay = Some(delay);
    }

    pub fn intermediate(cert_path: &str, key_path: &str) -> Result<Self>
    where
        Self: Sized,
    {
        Ok(TlsSettings {
            cert_path: Some(cert_path.to_string()),
            key_path: Some(key_path.to_string()),
            ca: None,
            security_policy: None,
            alpn: None,
            psk_config: None,
            client_auth_required: false,
            verify_client_hostname: false,
            max_blinding_delay: None,
        })
    }

    pub fn new() -> Self {
        TlsSettings {
            cert_path: None,
            key_path: None,
            ca: None,
            security_policy: None,
            alpn: None,
            psk_config: None,
            client_auth_required: false,
            verify_client_hostname: false,
            max_blinding_delay: None,
        }
    }
}

impl Acceptor {
    pub async fn tls_handshake<S: IO>(&self, stream: S) -> Result<TlsStream<S>> {
        debug!("new tls session");
        handshake(self, stream).await
    }
}


================================================
FILE: pingora-core/src/modules/http/compression.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! HTTP compression filter

use super::*;
use crate::protocols::http::compression::ResponseCompressionCtx;
use std::ops::{Deref, DerefMut};

/// HTTP response compression module
pub struct ResponseCompression(ResponseCompressionCtx);

impl Deref for ResponseCompression {
    type Target = ResponseCompressionCtx;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

impl DerefMut for ResponseCompression {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.0
    }
}

#[async_trait]
impl HttpModule for ResponseCompression {
    fn as_any(&self) -> &dyn std::any::Any {
        self
    }
    fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
        self
    }

    async fn request_header_filter(&mut self, req: &mut RequestHeader) -> Result<()> {
        self.0.request_filter(req);
        Ok(())
    }

    async fn response_header_filter(
        &mut self,
        resp: &mut ResponseHeader,
        end_of_stream: bool,
    ) -> Result<()> {
        self.0.response_header_filter(resp, end_of_stream);
        Ok(())
    }

    fn response_body_filter(
        &mut self,
        body: &mut Option<Bytes>,
        end_of_stream: bool,
    ) -> Result<()> {
        if !self.0.is_enabled() {
            return Ok(());
        }
        let compressed = self.0.response_body_filter(body.as_ref(), end_of_stream);
        if compressed.is_some() {
            *body = compressed;
        }
        Ok(())
    }

    fn response_done_filter(&mut self) -> Result<Option<Bytes>> {
        if !self.0.is_enabled() {
            return Ok(None);
        }
        // Flush or finish any remaining encoded bytes upon HTTP response completion
        // (if it was not already ended in the body filter).
        Ok(self.0.response_body_filter(None, true))
    }
}

/// The builder for HTTP response compression module
pub struct ResponseCompressionBuilder {
    level: u32,
}

impl ResponseCompressionBuilder {
    /// Return a [ModuleBuilder] for [ResponseCompression] with the given compression level
    pub fn enable(level: u32) -> ModuleBuilder {
        Box::new(ResponseCompressionBuilder { level })
    }
}

impl HttpModuleBuilder for ResponseCompressionBuilder {
    fn init(&self) -> Module {
        Box::new(ResponseCompression(ResponseCompressionCtx::new(
            self.level, false, false,
        )))
    }

    fn order(&self) -> i16 {
        // run the response filter later than most others filters
        i16::MIN / 2
    }
}


================================================
FILE: pingora-core/src/modules/http/grpc_web.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use super::*;
use crate::protocols::http::bridge::grpc_web::GrpcWebCtx;
use std::ops::{Deref, DerefMut};

/// gRPC-web bridge module, this will convert
/// HTTP/1.1 gRPC-web requests to H2 gRPC requests
#[derive(Default)]
pub struct GrpcWebBridge(GrpcWebCtx);

impl Deref for GrpcWebBridge {
    type Target = GrpcWebCtx;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

impl DerefMut for GrpcWebBridge {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.0
    }
}

#[async_trait]
impl HttpModule for GrpcWebBridge {
    fn as_any(&self) -> &dyn std::any::Any {
        self
    }

    fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
        self
    }

    async fn request_header_filter(&mut self, req: &mut RequestHeader) -> Result<()> {
        self.0.request_header_filter(req);
        Ok(())
    }

    async fn response_header_filter(
        &mut self,
        resp: &mut ResponseHeader,
        _end_of_stream: bool,
    ) -> Result<()> {
        self.0.response_header_filter(resp);
        Ok(())
    }

    fn response_trailer_filter(
        &mut self,
        trailers: &mut Option<Box<HeaderMap>>,
    ) -> Result<Option<Bytes>> {
        if let Some(trailers) = trailers {
            return self.0.response_trailer_filter(trailers);
        }
        Ok(None)
    }
}

/// The builder for gRPC-web bridge module
pub struct GrpcWeb;

impl HttpModuleBuilder for GrpcWeb {
    fn init(&self) -> Module {
        Box::new(GrpcWebBridge::default())
    }
}


================================================
FILE: pingora-core/src/modules/http/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Modules for HTTP traffic.
//!
//! [HttpModule]s define request and response filters to use while running an
//! [HttpServer](crate::apps::http_app::HttpServer)
//! application.
//! See the [ResponseCompression](crate::modules::http::compression::ResponseCompression)
//! module for an example of how to implement a basic module.

pub mod compression;
pub mod grpc_web;

use async_trait::async_trait;
use bytes::Bytes;
use http::HeaderMap;
use once_cell::sync::OnceCell;
use pingora_error::Result;
use pingora_http::{RequestHeader, ResponseHeader};
use std::any::Any;
use std::any::TypeId;
use std::collections::HashMap;
use std::sync::Arc;

/// The trait an HTTP traffic module needs to implement
#[async_trait]
pub trait HttpModule {
    async fn request_header_filter(&mut self, _req: &mut RequestHeader) -> Result<()> {
        Ok(())
    }

    async fn request_body_filter(
        &mut self,
        _body: &mut Option<Bytes>,
        _end_of_stream: bool,
    ) -> Result<()> {
        Ok(())
    }

    async fn response_header_filter(
        &mut self,
        _resp: &mut ResponseHeader,
        _end_of_stream: bool,
    ) -> Result<()> {
        Ok(())
    }

    fn response_body_filter(
        &mut self,
        _body: &mut Option<Bytes>,
        _end_of_stream: bool,
    ) -> Result<()> {
        Ok(())
    }

    fn response_trailer_filter(
        &mut self,
        _trailers: &mut Option<Box<HeaderMap>>,
    ) -> Result<Option<Bytes>> {
        Ok(None)
    }

    fn response_done_filter(&mut self) -> Result<Option<Bytes>> {
        Ok(None)
    }

    fn as_any(&self) -> &dyn Any;
    fn as_any_mut(&mut self) -> &mut dyn Any;
}

pub type Module = Box<dyn HttpModule + 'static + Send + Sync>;

/// Trait to init the http module ctx for each request
pub trait HttpModuleBuilder {
    /// The order the module will run
    ///
    /// The lower the value, the later it runs relative to other filters.
    /// If the order of the filter is not important, leave it to the default 0.
    fn order(&self) -> i16 {
        0
    }

    /// Initialize and return the per request module context
    fn init(&self) -> Module;
}

pub type ModuleBuilder = Box<dyn HttpModuleBuilder + 'static + Send + Sync>;

/// The object to hold multiple http modules
pub struct HttpModules {
    modules: Vec<ModuleBuilder>,
    module_index: OnceCell<Arc<HashMap<TypeId, usize>>>,
}

impl HttpModules {
    /// Create a new [HttpModules]
    pub fn new() -> Self {
        HttpModules {
            modules: vec![],
            module_index: OnceCell::new(),
        }
    }

    /// Add a new [ModuleBuilder] to [HttpModules]
    ///
    /// Each type of [HttpModule] can be only added once.
    /// # Panic
    /// Panic if any [HttpModule] is added more than once.
    pub fn add_module(&mut self, builder: ModuleBuilder) {
        if self.module_index.get().is_some() {
            // We use a shared module_index the index would be out of sync if we
            // add more modules.
            panic!("cannot add module after ctx is already built")
        }
        self.modules.push(builder);
        // not the most efficient way but should be fine
        // largest order first
        self.modules.sort_by_key(|m| -m.order());
    }

    /// Build the contexts of all the modules added to this [HttpModules]
    pub fn build_ctx(&self) -> HttpModuleCtx {
        let module_ctx: Vec<_> = self.modules.iter().map(|b| b.init()).collect();
        let module_index = self
            .module_index
            .get_or_init(|| {
                let mut module_index = HashMap::with_capacity(self.modules.len());
                for (i, c) in module_ctx.iter().enumerate() {
                    let exist = module_index.insert(c.as_any().type_id(), i);
                    if exist.is_some() {
                        panic!("duplicated filters found")
                    }
                }
                Arc::new(module_index)
            })
            .clone();

        HttpModuleCtx {
            module_ctx,
            module_index,
        }
    }
}

/// The Contexts of multiple modules
///
/// This is the object that will apply all the included modules to a certain HTTP request.
/// The modules are ordered according to their `order()`.
pub struct HttpModuleCtx {
    // the modules in the order of execution
    module_ctx: Vec<Module>,
    // find the module in the vec with its type ID
    module_index: Arc<HashMap<TypeId, usize>>,
}

impl HttpModuleCtx {
    /// Create a placeholder empty [HttpModuleCtx].
    ///
    /// [HttpModules] should be used to create nonempty [HttpModuleCtx].
    pub fn empty() -> Self {
        HttpModuleCtx {
            module_ctx: vec![],
            module_index: Arc::new(HashMap::new()),
        }
    }

    /// Get a ref to [HttpModule] if any.
    pub fn get<T: 'static>(&self) -> Option<&T> {
        let idx = self.module_index.get(&TypeId::of::<T>())?;
        let ctx = &self.module_ctx[*idx];
        Some(
            ctx.as_any()
                .downcast_ref::<T>()
                .expect("type should always match"),
        )
    }

    /// Get a mut ref to [HttpModule] if any.
    pub fn get_mut<T: 'static>(&mut self) -> Option<&mut T> {
        let idx = self.module_index.get(&TypeId::of::<T>())?;
        let ctx = &mut self.module_ctx[*idx];
        Some(
            ctx.as_any_mut()
                .downcast_mut::<T>()
                .expect("type should always match"),
        )
    }

    /// Run the `request_header_filter` for all the modules according to their orders.
    pub async fn request_header_filter(&mut self, req: &mut RequestHeader) -> Result<()> {
        for filter in self.module_ctx.iter_mut() {
            filter.request_header_filter(req).await?;
        }
        Ok(())
    }

    /// Run the `request_body_filter` for all the modules according to their orders.
    pub async fn request_body_filter(
        &mut self,
        body: &mut Option<Bytes>,
        end_of_stream: bool,
    ) -> Result<()> {
        for filter in self.module_ctx.iter_mut() {
            filter.request_body_filter(body, end_of_stream).await?;
        }
        Ok(())
    }

    /// Run the `response_header_filter` for all the modules according to their orders.
    pub async fn response_header_filter(
        &mut self,
        req: &mut ResponseHeader,
        end_of_stream: bool,
    ) -> Result<()> {
        for filter in self.module_ctx.iter_mut() {
            filter.response_header_filter(req, end_of_stream).await?;
        }
        Ok(())
    }

    /// Run the `response_body_filter` for all the modules according to their orders.
    pub fn response_body_filter(
        &mut self,
        body: &mut Option<Bytes>,
        end_of_stream: bool,
    ) -> Result<()> {
        for filter in self.module_ctx.iter_mut() {
            filter.response_body_filter(body, end_of_stream)?;
        }
        Ok(())
    }

    /// Run the `response_trailer_filter` for all the modules according to their orders.
    ///
    /// Returns an `Option<Bytes>` which can be used to write response trailers into
    /// the response body. Note, if multiple modules attempt to write trailers into
    /// the body the last one will be used.
    ///
    /// Implementors that intend to write trailers into the body need to ensure their filter
    /// is using an encoding that supports this.
    pub fn response_trailer_filter(
        &mut self,
        trailers: &mut Option<Box<HeaderMap>>,
    ) -> Result<Option<Bytes>> {
        let mut encoded = None;
        for filter in self.module_ctx.iter_mut() {
            if let Some(buf) = filter.response_trailer_filter(trailers)? {
                encoded = Some(buf);
            }
        }
        Ok(encoded)
    }

    /// Run the `response_done_filter` for all the modules according to their orders.
    ///
    /// This filter may be invoked in certain response paths to signal end of response
    /// if not already done so via trailers or body (with end flag set).
    ///
    /// Returns an `Option<Bytes>` which can be used to write additional response body
    /// bytes. Note, if multiple modules attempt to write body bytes, only the last one
    /// will be used.
    pub fn response_done_filter(&mut self) -> Result<Option<Bytes>> {
        let mut encoded = None;
        for filter in self.module_ctx.iter_mut() {
            if let Some(buf) = filter.response_done_filter()? {
                encoded = Some(buf);
            }
        }
        Ok(encoded)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    struct MyModule;
    #[async_trait]
    impl HttpModule for MyModule {
        fn as_any(&self) -> &dyn Any {
            self
        }
        fn as_any_mut(&mut self) -> &mut dyn Any {
            self
        }
        async fn request_header_filter(&mut self, req: &mut RequestHeader) -> Result<()> {
            req.insert_header("my-filter", "1")
        }
    }
    struct MyModuleBuilder;
    impl HttpModuleBuilder for MyModuleBuilder {
        fn order(&self) -> i16 {
            1
        }

        fn init(&self) -> Module {
            Box::new(MyModule)
        }
    }

    struct MyOtherModule;
    #[async_trait]
    impl HttpModule for MyOtherModule {
        fn as_any(&self) -> &dyn Any {
            self
        }
        fn as_any_mut(&mut self) -> &mut dyn Any {
            self
        }
        async fn request_header_filter(&mut self, req: &mut RequestHeader) -> Result<()> {
            if req.headers.get("my-filter").is_some() {
                // if this MyOtherModule runs after MyModule
                req.insert_header("my-filter", "2")
            } else {
                // if this MyOtherModule runs before MyModule
                req.insert_header("my-other-filter", "1")
            }
        }
    }
    struct MyOtherModuleBuilder;
    impl HttpModuleBuilder for MyOtherModuleBuilder {
        fn order(&self) -> i16 {
            -1
        }

        fn init(&self) -> Module {
            Box::new(MyOtherModule)
        }
    }

    #[test]
    fn test_module_get() {
        let mut http_module = HttpModules::new();
        http_module.add_module(Box::new(MyModuleBuilder));
        http_module.add_module(Box::new(MyOtherModuleBuilder));
        let mut ctx = http_module.build_ctx();
        assert!(ctx.get::<MyModule>().is_some());
        assert!(ctx.get::<MyOtherModule>().is_some());
        assert!(ctx.get::<usize>().is_none());
        assert!(ctx.get_mut::<MyModule>().is_some());
        assert!(ctx.get_mut::<MyOtherModule>().is_some());
        assert!(ctx.get_mut::<usize>().is_none());
    }

    #[tokio::test]
    async fn test_module_filter() {
        let mut http_module = HttpModules::new();
        http_module.add_module(Box::new(MyOtherModuleBuilder));
        http_module.add_module(Box::new(MyModuleBuilder));
        let mut ctx = http_module.build_ctx();
        let mut req = RequestHeader::build("Get", b"/", None).unwrap();
        ctx.request_header_filter(&mut req).await.unwrap();
        // MyModule runs before MyOtherModule
        assert_eq!(req.headers.get("my-filter").unwrap(), "2");
        assert!(req.headers.get("my-other-filter").is_none());
    }
}


================================================
FILE: pingora-core/src/modules/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Modules to extend the functionalities of pingora services.
pub mod http;


================================================
FILE: pingora-core/src/protocols/digest.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Extra information about the connection

use std::sync::Arc;
use std::time::{Duration, SystemTime};

use once_cell::sync::OnceCell;

use super::l4::ext::{get_original_dest, get_recv_buf, get_snd_buf, get_tcp_info, TCP_INFO};
use super::l4::socket::SocketAddr;
use super::raw_connect::ProxyDigest;
use super::tls::digest::SslDigest;

/// The information can be extracted from a connection
#[derive(Clone, Debug, Default)]
pub struct Digest {
    /// Information regarding the TLS of this connection if any
    pub ssl_digest: Option<Arc<SslDigest>>,
    /// Timing information
    pub timing_digest: Vec<Option<TimingDigest>>,
    /// information regarding the CONNECT proxy this connection uses.
    pub proxy_digest: Option<Arc<ProxyDigest>>,
    /// Information about underlying socket/fd of this connection
    pub socket_digest: Option<Arc<SocketDigest>>,
}

/// The interface to return protocol related information
pub trait ProtoDigest {
    fn get_digest(&self) -> Option<&Digest> {
        None
    }
}

/// The timing information of the connection
#[derive(Clone, Debug)]
pub struct TimingDigest {
    /// When this connection was established
    pub established_ts: SystemTime,
}

impl Default for TimingDigest {
    fn default() -> Self {
        TimingDigest {
            established_ts: SystemTime::UNIX_EPOCH,
        }
    }
}

#[derive(Debug)]
/// The interface to return socket-related information
pub struct SocketDigest {
    #[cfg(unix)]
    raw_fd: std::os::unix::io::RawFd,
    #[cfg(windows)]
    raw_sock: std::os::windows::io::RawSocket,
    /// Remote socket address
    pub peer_addr: OnceCell<Option<SocketAddr>>,
    /// Local socket address
    pub local_addr: OnceCell<Option<SocketAddr>>,
    /// Original destination address
    pub original_dst: OnceCell<Option<SocketAddr>>,
}

impl SocketDigest {
    #[cfg(unix)]
    pub fn from_raw_fd(raw_fd: std::os::unix::io::RawFd) -> SocketDigest {
        SocketDigest {
            raw_fd,
            peer_addr: OnceCell::new(),
            local_addr: OnceCell::new(),
            original_dst: OnceCell::new(),
        }
    }

    #[cfg(windows)]
    pub fn from_raw_socket(raw_sock: std::os::windows::io::RawSocket) -> SocketDigest {
        SocketDigest {
            raw_sock,
            peer_addr: OnceCell::new(),
            local_addr: OnceCell::new(),
            original_dst: OnceCell::new(),
        }
    }

    #[cfg(unix)]
    pub fn peer_addr(&self) -> Option<&SocketAddr> {
        self.peer_addr
            .get_or_init(|| SocketAddr::from_raw_fd(self.raw_fd, true))
            .as_ref()
    }

    #[cfg(windows)]
    pub fn peer_addr(&self) -> Option<&SocketAddr> {
        self.peer_addr
            .get_or_init(|| SocketAddr::from_raw_socket(self.raw_sock, true))
            .as_ref()
    }

    #[cfg(unix)]
    pub fn local_addr(&self) -> Option<&SocketAddr> {
        self.local_addr
            .get_or_init(|| SocketAddr::from_raw_fd(self.raw_fd, false))
            .as_ref()
    }

    #[cfg(windows)]
    pub fn local_addr(&self) -> Option<&SocketAddr> {
        self.local_addr
            .get_or_init(|| SocketAddr::from_raw_socket(self.raw_sock, false))
            .as_ref()
    }

    fn is_inet(&self) -> bool {
        self.local_addr().and_then(|p| p.as_inet()).is_some()
    }

    #[cfg(unix)]
    pub fn tcp_info(&self) -> Option<TCP_INFO> {
        if self.is_inet() {
            get_tcp_info(self.raw_fd).ok()
        } else {
            None
        }
    }

    #[cfg(windows)]
    pub fn tcp_info(&self) -> Option<TCP_INFO> {
        if self.is_inet() {
            get_tcp_info(self.raw_sock).ok()
        } else {
            None
        }
    }

    #[cfg(unix)]
    pub fn get_recv_buf(&self) -> Option<usize> {
        if self.is_inet() {
            get_recv_buf(self.raw_fd).ok()
        } else {
            None
        }
    }

    #[cfg(windows)]
    pub fn get_recv_buf(&self) -> Option<usize> {
        if self.is_inet() {
            get_recv_buf(self.raw_sock).ok()
        } else {
            None
        }
    }

    #[cfg(unix)]
    pub fn get_snd_buf(&self) -> Option<usize> {
        if self.is_inet() {
            get_snd_buf(self.raw_fd).ok()
        } else {
            None
        }
    }

    #[cfg(windows)]
    pub fn get_snd_buf(&self) -> Option<usize> {
        if self.is_inet() {
            get_snd_buf(self.raw_sock).ok()
        } else {
            None
        }
    }

    #[cfg(unix)]
    pub fn original_dst(&self) -> Option<&SocketAddr> {
        self.original_dst
            .get_or_init(|| {
                get_original_dest(self.raw_fd)
                    .ok()
                    .flatten()
                    .map(SocketAddr::Inet)
            })
            .as_ref()
    }

    #[cfg(windows)]
    pub fn original_dst(&self) -> Option<&SocketAddr> {
        self.original_dst
            .get_or_init(|| {
                get_original_dest(self.raw_sock)
                    .ok()
                    .flatten()
                    .map(SocketAddr::Inet)
            })
            .as_ref()
    }
}

/// The interface to return timing information
pub trait GetTimingDigest {
    /// Return the timing for each layer from the lowest layer to upper
    fn get_timing_digest(&self) -> Vec<Option<TimingDigest>>;
    fn get_read_pending_time(&self) -> Duration {
        Duration::ZERO
    }
    fn get_write_pending_time(&self) -> Duration {
        Duration::ZERO
    }
}

/// The interface to set or return proxy information
pub trait GetProxyDigest {
    fn get_proxy_digest(&self) -> Option<Arc<ProxyDigest>>;
    fn set_proxy_digest(&mut self, _digest: ProxyDigest) {}
}

/// The interface to set or return socket information
pub trait GetSocketDigest {
    fn get_socket_digest(&self) -> Option<Arc<SocketDigest>>;
    fn set_socket_digest(&mut self, _socket_digest: SocketDigest) {}
}


================================================
FILE: pingora-core/src/protocols/http/body_buffer.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use bytes::{Bytes, BytesMut};

/// A buffer with size limit. When the total amount of data written to the buffer is below the limit
/// all the data will be held in the buffer. Otherwise, the buffer will report to be truncated.
pub struct FixedBuffer {
    buffer: BytesMut,
    capacity: usize,
    truncated: bool,
}

impl FixedBuffer {
    pub fn new(capacity: usize) -> Self {
        FixedBuffer {
            buffer: BytesMut::new(),
            capacity,
            truncated: false,
        }
    }

    // TODO: maybe store a Vec of Bytes for zero-copy
    pub fn write_to_buffer(&mut self, data: &Bytes) {
        if !self.truncated && (self.buffer.len() + data.len() <= self.capacity) {
            self.buffer.extend_from_slice(data);
        } else {
            // TODO: clear data because the data held here is useless anyway?
            self.truncated = true;
        }
    }
    pub fn clear(&mut self) {
        self.truncated = false;
        self.buffer.clear();
    }
    pub fn is_empty(&self) -> bool {
        self.buffer.len() == 0
    }
    pub fn is_truncated(&self) -> bool {
        self.truncated
    }
    pub fn get_buffer(&self) -> Option<Bytes> {
        // TODO: return None if truncated?
        if !self.is_empty() {
            Some(self.buffer.clone().freeze())
        } else {
            None
        }
    }
}


================================================
FILE: pingora-core/src/protocols/http/bridge/grpc_web.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use bytes::{BufMut, Bytes, BytesMut};
use http::{
    header::{CONTENT_LENGTH, CONTENT_TYPE, TRANSFER_ENCODING},
    HeaderMap,
};
use pingora_error::{ErrorType::ReadError, OrErr, Result};
use pingora_http::{RequestHeader, ResponseHeader};

/// Used for bridging gRPC to gRPC-web and vice-versa.
/// See gRPC-web [spec](https://github.com/grpc/grpc/blob/master/doc/PROTOCOL-WEB.md) and
/// gRPC h2 [spec](https://github.com/grpc/grpc/blob/master/doc/PROTOCOL-HTTP2.md) for more details.
#[derive(Default, PartialEq, Debug)]
pub enum GrpcWebCtx {
    #[default]
    Disabled,
    Init,
    Upgrade,
    Trailers,
    Done,
}

const GRPC: &str = "application/grpc";
const GRPC_WEB: &str = "application/grpc-web";

impl GrpcWebCtx {
    pub fn init(&mut self) {
        *self = Self::Init;
    }

    /// gRPC-web request is fed into this filter, if the module is initialized
    /// we attempt to convert it to a gRPC request
    pub fn request_header_filter(&mut self, req: &mut RequestHeader) {
        if *self != Self::Init {
            // not enabled
            return;
        }

        let content_type = req
            .headers
            .get(CONTENT_TYPE)
            .and_then(|v| v.to_str().ok())
            .unwrap_or_default();

        // check we have a valid grpc-web prefix
        if !(content_type.len() >= GRPC_WEB.len()
            && content_type[..GRPC_WEB.len()].eq_ignore_ascii_case(GRPC_WEB))
        {
            // not gRPC-web
            return;
        }

        // change content type to grpc
        let ct = content_type.to_lowercase().replace(GRPC_WEB, GRPC);
        req.insert_header(CONTENT_TYPE, ct).expect("insert header");

        // The 'te' request header is used to detect incompatible proxies
        // which are supposed to remove 'te' if it is unsupported.
        // This header is required by gRPC over h2 protocol.
        // https://github.com/grpc/grpc/blob/master/doc/PROTOCOL-HTTP2.md
        req.insert_header("te", "trailers").expect("insert header");

        // For gRPC requests, EOS (end-of-stream) is indicated by the presence of the
        // END_STREAM flag on the last received DATA frame.
        // In scenarios where the Request stream needs to be closed
        // but no data remains to be sent implementations
        // MUST send an empty DATA frame with this flag set.
        req.set_send_end_stream(false);

        *self = Self::Upgrade
    }

    /// gRPC response is fed into this filter, if the module is in the bridge state
    /// attempt to convert the response it to a gRPC-web response
    pub fn response_header_filter(&mut self, resp: &mut ResponseHeader) {
        if *self != Self::Upgrade {
            // not an upgrade
            return;
        }

        if resp.status.is_informational() {
            // proxy informational statuses through
            return;
        }

        let content_type = resp
            .headers
            .get(CONTENT_TYPE)
            .and_then(|v| v.to_str().ok())
            .unwrap_or_default();

        // upstream h2, no reason to normalize case
        if !content_type.starts_with(GRPC) {
            // not gRPC
            *self = Self::Disabled;
            return;
        }

        // change content type to gRPC-web
        let ct = content_type.replace(GRPC, GRPC_WEB);
        resp.insert_header(CONTENT_TYPE, ct).expect("insert header");

        // always use chunked for gRPC-web
        resp.remove_header(&CONTENT_LENGTH);
        resp.insert_header(TRANSFER_ENCODING, "chunked")
            .expect("insert header");

        *self = Self::Trailers
    }

    /// Used to convert gRPC trailers into gRPC-web trailers, note
    /// gRPC-web trailers are encoded into the response body so we return
    /// the encoded bytes here.
    pub fn response_trailer_filter(
        &mut self,
        resp_trailers: &mut HeaderMap,
    ) -> Result<Option<Bytes>> {
        /* Trailer header frame and trailer headers
            0 - - 1 - - 2 - - 3 - - 4 - - 5 - - 6 - - 7 - - 8
            | Ind |        Length         |     Headers     | <- trailer header indicator, length of headers
            |                    Headers                    | <- rest is headers
            |                    Headers                    |
        */
        // TODO compressed trailer?
        // grpc-web trailers frame head
        const GRPC_WEB_TRAILER: u8 = 0x80;

        // number of bytes in trailer header
        const GRPC_TRAILER_HEADER_LEN: usize = 5;

        // just some estimate
        const DEFAULT_TRAILER_BUFFER_SIZE: usize = 256;

        if *self != Self::Trailers {
            // not an upgrade
            *self = Self::Disabled;
            return Ok(None);
        }

        // trailers are expected to arrive all at once encoded into a single trailers frame
        // trailers in frame are separated by CRLFs
        let mut buf = BytesMut::with_capacity(DEFAULT_TRAILER_BUFFER_SIZE);
        let mut trailers = buf.split_off(GRPC_TRAILER_HEADER_LEN);

        // iterate the key/value pairs and encode them into the tmp buffer
        for (key, value) in resp_trailers.iter() {
            // encode header
            trailers.put_slice(key.as_ref());
            trailers.put_slice(b":");

            // encode value
            trailers.put_slice(value.as_ref());

            // encode header separator
            trailers.put_slice(b"\r\n");
        }

        // ensure trailer length within u32
        let len = trailers.len().try_into().or_err_with(ReadError, || {
            format!("invalid gRPC trailer length: {}", trailers.len())
        })?;
        buf.put_u8(GRPC_WEB_TRAILER);
        buf.put_u32(len);
        buf.unsplit(trailers);

        *self = Self::Done;
        Ok(Some(buf.freeze()))
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use http::{request::Request, response::Response, Version};

    #[test]
    fn non_grpc_web_request_ignored() {
        let request = Request::get("https://pingora.dev/")
            .header(CONTENT_TYPE, "application/grpc-we")
            .version(Version::HTTP_2) // only set this to verify send_end_stream is configured
            .body(())
            .unwrap();
        let mut request = request.into_parts().0.into();

        let mut filter = GrpcWebCtx::default();
        filter.init();
        filter.request_header_filter(&mut request);
        assert_eq!(filter, GrpcWebCtx::Init);

        let headers = &request.headers;
        assert_eq!(headers.get("te"), None);
        assert_eq!(headers.get("application/grpc"), None);
        assert_eq!(request.send_end_stream(), Some(true));
    }

    #[test]
    fn grpc_web_request_module_disabled_ignored() {
        let request = Request::get("https://pingora.dev/")
            .header(CONTENT_TYPE, "application/grpc-web")
            .version(Version::HTTP_2) // only set this to verify send_end_stream is configured
            .body(())
            .unwrap();
        let mut request = request.into_parts().0.into();

        // do not init
        let mut filter = GrpcWebCtx::default();
        filter.request_header_filter(&mut request);
        assert_eq!(filter, GrpcWebCtx::Disabled);

        let headers = &request.headers;
        assert_eq!(headers.get("te"), None);
        assert_eq!(headers.get(CONTENT_TYPE).unwrap(), "application/grpc-web");
        assert_eq!(request.send_end_stream(), Some(true));
    }

    #[test]
    fn grpc_web_request_upgrade() {
        let request = Request::get("https://pingora.org/")
            .header(CONTENT_TYPE, "application/gRPC-web+thrift")
            .version(Version::HTTP_2) // only set this to verify send_end_stream is configured
            .body(())
            .unwrap();
        let mut request = request.into_parts().0.into();

        let mut filter = GrpcWebCtx::default();
        filter.init();
        filter.request_header_filter(&mut request);
        assert_eq!(filter, GrpcWebCtx::Upgrade);

        let headers = &request.headers;
        assert_eq!(headers.get("te").unwrap(), "trailers");
        assert_eq!(
            headers.get(CONTENT_TYPE).unwrap(),
            "application/grpc+thrift"
        );
        assert_eq!(request.send_end_stream(), Some(false));
    }

    #[test]
    fn non_grpc_response_ignored() {
        let response = Response::builder()
            .header(CONTENT_TYPE, "text/html")
            .header(CONTENT_LENGTH, "10")
            .body(())
            .unwrap();
        let mut response = response.into_parts().0.into();

        let mut filter = GrpcWebCtx::Upgrade;
        filter.response_header_filter(&mut response);
        assert_eq!(filter, GrpcWebCtx::Disabled);

        let headers = &response.headers;
        assert_eq!(headers.get(CONTENT_TYPE).unwrap(), "text/html");
        assert_eq!(headers.get(CONTENT_LENGTH).unwrap(), "10");
    }

    #[test]
    fn grpc_response_module_disabled_ignored() {
        let response = Response::builder()
            .header(CONTENT_TYPE, "application/grpc")
            .body(())
            .unwrap();
        let mut response = response.into_parts().0.into();

        let mut filter = GrpcWebCtx::default();
        filter.response_header_filter(&mut response);
        assert_eq!(filter, GrpcWebCtx::Disabled);

        let headers = &response.headers;
        assert_eq!(headers.get(CONTENT_TYPE).unwrap(), "application/grpc");
    }

    #[test]
    fn grpc_response_upgrade() {
        let response = Response::builder()
            .header(CONTENT_TYPE, "application/grpc+proto")
            .header(CONTENT_LENGTH, "0")
            .body(())
            .unwrap();
        let mut response = response.into_parts().0.into();

        let mut filter = GrpcWebCtx::Upgrade;
        filter.response_header_filter(&mut response);
        assert_eq!(filter, GrpcWebCtx::Trailers);

        let headers = &response.headers;
        assert_eq!(
            headers.get(CONTENT_TYPE).unwrap(),
            "application/grpc-web+proto"
        );
        assert_eq!(headers.get(TRANSFER_ENCODING).unwrap(), "chunked");
        assert!(headers.get(CONTENT_LENGTH).is_none());
    }

    #[test]
    fn grpc_response_informational_proxied() {
        let response = Response::builder().status(100).body(()).unwrap();
        let mut response = response.into_parts().0.into();

        let mut filter = GrpcWebCtx::Upgrade;
        filter.response_header_filter(&mut response);
        assert_eq!(filter, GrpcWebCtx::Upgrade); // still upgrade
    }

    #[test]
    fn grpc_response_trailer_headers_convert_to_byte_buf() {
        let mut response = Response::builder()
            .header("grpc-status", "0")
            .header("grpc-message", "OK")
            .body(())
            .unwrap();
        let response = response.headers_mut();

        let mut filter = GrpcWebCtx::Trailers;
        let buf = filter.response_trailer_filter(response).unwrap().unwrap();
        assert_eq!(filter, GrpcWebCtx::Done);

        let expected = b"grpc-status:0\r\ngrpc-message:OK\r\n";
        let expected_len: u32 = expected.len() as u32; // 32 bytes

        // assert the length prefix message frame
        // [1 byte (header)| 4 byte (length) | 15 byte (grpc-status:0\r\n) | 17 bytes (grpc-message:OK\r\n)]
        assert_eq!(0x80, buf[0]); // frame should start with trailer header
        assert_eq!(expected_len.to_be_bytes(), buf[1..5]); // next 4 bytes length of trailer
        assert_eq!(expected[..15], buf[5..20]); // grpc-status:0\r\n (15 bytes)
        assert_eq!(expected[15..], buf[20..]); // grpc-message:OK\r\n (17 bytes)
    }
}


================================================
FILE: pingora-core/src/protocols/http/bridge/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

pub mod grpc_web;


================================================
FILE: pingora-core/src/protocols/http/client.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use bytes::Bytes;
use pingora_error::Result;
use pingora_http::{RequestHeader, ResponseHeader};
use std::time::Duration;

use super::v2::client::Http2Session;
use super::{custom::client::Session, v1::client::HttpSession as Http1Session};
use crate::protocols::{Digest, SocketAddr, Stream};

/// A type for Http client session. It can be either an Http1 connection or an Http2 stream.
pub enum HttpSession<S = ()> {
    H1(Http1Session),
    H2(Http2Session),
    Custom(S),
}

impl<S: Session> HttpSession<S> {
    pub fn as_http1(&self) -> Option<&Http1Session> {
        match self {
            Self::H1(s) => Some(s),
            Self::H2(_) => None,
            Self::Custom(_) => None,
        }
    }

    pub fn as_http2(&self) -> Option<&Http2Session> {
        match self {
            Self::H1(_) => None,
            Self::H2(s) => Some(s),
            Self::Custom(_) => None,
        }
    }

    pub fn as_custom(&self) -> Option<&S> {
        match self {
            Self::H1(_) => None,
            Self::H2(_) => None,
            Self::Custom(c) => Some(c),
        }
    }

    pub fn as_custom_mut(&mut self) -> Option<&mut S> {
        match self {
            Self::H1(_) => None,
            Self::H2(_) => None,
            Self::Custom(c) => Some(c),
        }
    }

    /// Write the request header to the server
    /// After the request header is sent. The caller can either start reading the response or
    /// sending request body if any.
    pub async fn write_request_header(&mut self, req: Box<RequestHeader>) -> Result<()> {
        match self {
            HttpSession::H1(h1) => {
                h1.write_request_header(req).await?;
                Ok(())
            }
            HttpSession::H2(h2) => h2.write_request_header(req, false),
            HttpSession::Custom(c) => c.write_request_header(req, false).await,
        }
    }

    /// Write a chunk of the request body.
    pub async fn write_request_body(&mut self, data: Bytes, end: bool) -> Result<()> {
        match self {
            HttpSession::H1(h1) => {
                // TODO: maybe h1 should also have the concept of `end`
                h1.write_body(&data).await?;
                Ok(())
            }
            HttpSession::H2(h2) => h2.write_request_body(data, end).await,
            HttpSession::Custom(c) => c.write_request_body(data, end).await,
        }
    }

    /// Signal that the request body has ended
    pub async fn finish_request_body(&mut self) -> Result<()> {
        match self {
            HttpSession::H1(h1) => {
                h1.finish_body().await?;
                Ok(())
            }
            HttpSession::H2(h2) => h2.finish_request_body(),
            HttpSession::Custom(c) => c.finish_request_body().await,
        }
    }

    /// Set the read timeout for reading header and body.
    ///
    /// The timeout is per read operation, not on the overall time reading the entire response
    pub fn set_read_timeout(&mut self, timeout: Option<Duration>) {
        match self {
            HttpSession::H1(h1) => h1.read_timeout = timeout,
            HttpSession::H2(h2) => h2.read_timeout = timeout,
            HttpSession::Custom(c) => c.set_read_timeout(timeout),
        }
    }

    /// Set the write timeout for writing header and body.
    ///
    /// The timeout is per write operation, not on the overall time writing the entire request.
    pub fn set_write_timeout(&mut self, timeout: Option<Duration>) {
        match self {
            HttpSession::H1(h1) => h1.write_timeout = timeout,
            HttpSession::H2(h2) => h2.write_timeout = timeout,
            HttpSession::Custom(c) => c.set_write_timeout(timeout),
        }
    }

    /// Read the response header from the server
    /// For http1, this function can be called multiple times, if the headers received are just
    /// informational headers.
    pub async fn read_response_header(&mut self) -> Result<()> {
        match self {
            HttpSession::H1(h1) => {
                h1.read_response().await?;
                Ok(())
            }
            HttpSession::H2(h2) => h2.read_response_header().await,
            HttpSession::Custom(c) => c.read_response_header().await,
        }
    }

    /// Read response body
    ///
    /// `None` when no more body to read.
    pub async fn read_response_body(&mut self) -> Result<Option<Bytes>> {
        match self {
            HttpSession::H1(h1) => h1.read_body_bytes().await,
            HttpSession::H2(h2) => h2.read_response_body().await,
            HttpSession::Custom(c) => c.read_response_body().await,
        }
    }

    /// No (more) body to read
    pub fn response_done(&mut self) -> bool {
        match self {
            HttpSession::H1(h1) => h1.is_body_done(),
            HttpSession::H2(h2) => h2.response_finished(),
            HttpSession::Custom(c) => c.response_finished(),
        }
    }

    /// Give up the http session abruptly.
    /// For H1 this will close the underlying connection
    /// For H2 this will send RST_STREAM frame to end this stream if the stream has not ended at all
    pub async fn shutdown(&mut self) {
        match self {
            Self::H1(s) => s.shutdown().await,
            Self::H2(s) => s.shutdown(),
            Self::Custom(c) => c.shutdown(0, "shutdown").await,
        }
    }

    /// Get the response header of the server
    ///
    /// `None` if the response header is not read yet.
    pub fn response_header(&self) -> Option<&ResponseHeader> {
        match self {
            Self::H1(s) => s.resp_header(),
            Self::H2(s) => s.response_header(),
            Self::Custom(c) => c.response_header(),
        }
    }

    /// Return the [Digest] of the connection
    ///
    /// For reused connection, the timing in the digest will reflect its initial handshakes
    /// The caller should check if the connection is reused to avoid misuse of the timing field.
    pub fn digest(&self) -> Option<&Digest> {
        match self {
            Self::H1(s) => Some(s.digest()),
            Self::H2(s) => s.digest(),
            Self::Custom(c) => c.digest(),
        }
    }

    /// Return a mutable [Digest] reference for the connection.
    ///
    /// Will return `None` if this is an H2 session and multiple streams are open.
    pub fn digest_mut(&mut self) -> Option<&mut Digest> {
        match self {
            Self::H1(s) => Some(s.digest_mut()),
            Self::H2(s) => s.digest_mut(),
            Self::Custom(s) => s.digest_mut(),
        }
    }

    /// Return the server (peer) address of the connection.
    pub fn server_addr(&self) -> Option<&SocketAddr> {
        match self {
            Self::H1(s) => s.server_addr(),
            Self::H2(s) => s.server_addr(),
            Self::Custom(s) => s.server_addr(),
        }
    }

    /// Return the client (local) address of the connection.
    pub fn client_addr(&self) -> Option<&SocketAddr> {
        match self {
            Self::H1(s) => s.client_addr(),
            Self::H2(s) => s.client_addr(),
            Self::Custom(s) => s.client_addr(),
        }
    }

    /// Get the reference of the [Stream] that this HTTP/1 session is operating upon.
    /// None if the HTTP session is over H2
    pub fn stream(&self) -> Option<&Stream> {
        match self {
            Self::H1(s) => Some(s.stream()),
            Self::H2(_) => None,
            Self::Custom(_) => None,
        }
    }
}


================================================
FILE: pingora-core/src/protocols/http/compression/brotli.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use super::Encode;
use super::COMPRESSION_ERROR;

use brotli::{CompressorWriter, DecompressorWriter};
use bytes::Bytes;
use pingora_error::{OrErr, Result};
use std::io::Write;
use std::time::{Duration, Instant};

pub struct Decompressor {
    decompress: DecompressorWriter<Vec<u8>>,
    total_in: usize,
    total_out: usize,
    duration: Duration,
}

impl Decompressor {
    pub fn new() -> Self {
        Decompressor {
            // default buf is 4096 if 0 is used, TODO: figure out the significance of this value
            decompress: DecompressorWriter::new(vec![], 0),
            total_in: 0,
            total_out: 0,
            duration: Duration::new(0, 0),
        }
    }
}

impl Encode for Decompressor {
    fn encode(&mut self, input: &[u8], end: bool) -> Result<Bytes> {
        const MAX_INIT_COMPRESSED_SIZE_CAP: usize = 4 * 1024;
        // Brotli compress ratio can be 3.5 to 4.5
        const ESTIMATED_COMPRESSION_RATIO: usize = 4;
        let start = Instant::now();
        self.total_in += input.len();
        // cap the buf size amplification, there is a DoS risk of always allocate
        // 4x the memory of the input buffer
        let reserve_size = if input.len() < MAX_INIT_COMPRESSED_SIZE_CAP {
            input.len() * ESTIMATED_COMPRESSION_RATIO
        } else {
            input.len()
        };
        self.decompress.get_mut().reserve(reserve_size);
        self.decompress
            .write_all(input)
            .or_err(COMPRESSION_ERROR, "while decompress Brotli")?;
        // write to vec will never fail. The only possible error is that the input data
        // is invalid (not brotli compressed)
        if end {
            self.decompress
                .flush()
                .or_err(COMPRESSION_ERROR, "while decompress Brotli")?;
        }
        self.total_out += self.decompress.get_ref().len();
        self.duration += start.elapsed();
        Ok(std::mem::take(self.decompress.get_mut()).into()) // into() Bytes will drop excess capacity
    }

    fn stat(&self) -> (&'static str, usize, usize, Duration) {
        ("de-brotli", self.total_in, self.total_out, self.duration)
    }
}

pub struct Compressor {
    compress: CompressorWriter<Vec<u8>>,
    total_in: usize,
    total_out: usize,
    duration: Duration,
}

impl Compressor {
    pub fn new(level: u32) -> Self {
        Compressor {
            // buf_size:4096 , lgwin:19 TODO: fine tune these
            compress: CompressorWriter::new(vec![], 4096, level, 19),
            total_in: 0,
            total_out: 0,
            duration: Duration::new(0, 0),
        }
    }
}

impl Encode for Compressor {
    fn encode(&mut self, input: &[u8], end: bool) -> Result<Bytes> {
        // reserve at most 16k
        const MAX_INIT_COMPRESSED_BUF_SIZE: usize = 16 * 1024;
        let start = Instant::now();
        self.total_in += input.len();

        // reserve at most input size, cap at 16k, compressed output should be smaller
        self.compress
            .get_mut()
            .reserve(std::cmp::min(MAX_INIT_COMPRESSED_BUF_SIZE, input.len()));
        self.compress
            .write_all(input)
            .or_err(COMPRESSION_ERROR, "while compress Brotli")?;
        // write to vec will never fail.
        if end {
            self.compress
                .flush()
                .or_err(COMPRESSION_ERROR, "while compress Brotli")?;
        }
        self.total_out += self.compress.get_ref().len();
        self.duration += start.elapsed();
        Ok(std::mem::take(self.compress.get_mut()).into()) // into() Bytes will drop excess capacity
    }

    fn stat(&self) -> (&'static str, usize, usize, Duration) {
        ("brotli", self.total_in, self.total_out, self.duration)
    }
}

#[cfg(test)]
mod tests_stream {
    use super::*;

    #[test]
    fn decompress_brotli_data() {
        let mut compressor = Decompressor::new();
        let decompressed = compressor
            .encode(
                &[
                    0x1f, 0x0f, 0x00, 0xf8, 0x45, 0x07, 0x87, 0x3e, 0x10, 0xfb, 0x55, 0x92, 0xec,
                    0x12, 0x09, 0xcc, 0x38, 0xdd, 0x51, 0x1e,
                ],
                true,
            )
            .unwrap();

        assert_eq!(&decompressed[..], &b"adcdefgabcdefgh\n"[..]);
    }

    #[test]
    fn compress_brotli_data() {
        let mut compressor = Compressor::new(11);
        let compressed = compressor.encode(&b"adcdefgabcdefgh\n"[..], true).unwrap();

        assert_eq!(
            &compressed[..],
            &[
                0x85, 0x07, 0x00, 0xf8, 0x45, 0x07, 0x87, 0x3e, 0x10, 0xfb, 0x55, 0x92, 0xec, 0x12,
                0x09, 0xcc, 0x38, 0xdd, 0x51, 0x1e,
            ],
        );
    }
}


================================================
FILE: pingora-core/src/protocols/http/compression/gzip.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use super::{Encode, COMPRESSION_ERROR};

use bytes::Bytes;
use flate2::write::{GzDecoder, GzEncoder};
use pingora_error::{OrErr, Result};
use std::io::Write;
use std::time::{Duration, Instant};

pub struct Decompressor {
    decompress: GzDecoder<Vec<u8>>,
    total_in: usize,
    total_out: usize,
    duration: Duration,
}

impl Decompressor {
    pub fn new() -> Self {
        Decompressor {
            decompress: GzDecoder::new(vec![]),
            total_in: 0,
            total_out: 0,
            duration: Duration::new(0, 0),
        }
    }
}

impl Encode for Decompressor {
    fn encode(&mut self, input: &[u8], end: bool) -> Result<Bytes> {
        const MAX_INIT_COMPRESSED_SIZE_CAP: usize = 4 * 1024;
        const ESTIMATED_COMPRESSION_RATIO: usize = 3; // estimated 2.5-3x compression
        let start = Instant::now();
        self.total_in += input.len();
        // cap the buf size amplification, there is a DoS risk of always allocate
        // 3x the memory of the input buffer
        let reserve_size = if input.len() < MAX_INIT_COMPRESSED_SIZE_CAP {
            input.len() * ESTIMATED_COMPRESSION_RATIO
        } else {
            input.len()
        };
        self.decompress.get_mut().reserve(reserve_size);
        self.decompress
            .write_all(input)
            .or_err(COMPRESSION_ERROR, "while decompress Gzip")?;
        // write to vec will never fail, only possible error is that the input data
        // was not actually gzip compressed
        if end {
            self.decompress
                .try_finish()
                .or_err(COMPRESSION_ERROR, "while decompress Gzip")?;
        }
        self.total_out += self.decompress.get_ref().len();
        self.duration += start.elapsed();
        Ok(std::mem::take(self.decompress.get_mut()).into()) // into() Bytes will drop excess capacity
    }

    fn stat(&self) -> (&'static str, usize, usize, Duration) {
        ("de-gzip", self.total_in, self.total_out, self.duration)
    }
}

pub struct Compressor {
    // TODO: enum for other compression algorithms
    compress: GzEncoder<Vec<u8>>,
    total_in: usize,
    total_out: usize,
    duration: Duration,
}

impl Compressor {
    pub fn new(level: u32) -> Compressor {
        Compressor {
            compress: GzEncoder::new(vec![], flate2::Compression::new(level)),
            total_in: 0,
            total_out: 0,
            duration: Duration::new(0, 0),
        }
    }
}

impl Encode for Compressor {
    // infallible because compression can take any data
    fn encode(&mut self, input: &[u8], end: bool) -> Result<Bytes> {
        // reserve at most 16k
        const MAX_INIT_COMPRESSED_BUF_SIZE: usize = 16 * 1024;
        let start = Instant::now();
        self.total_in += input.len();
        self.compress
            .get_mut()
            .reserve(std::cmp::min(MAX_INIT_COMPRESSED_BUF_SIZE, input.len()));
        self.write_all(input).unwrap(); // write to vec, should never fail
        if end {
            self.try_finish().unwrap(); // write to vec, should never fail
        }
        self.total_out += self.compress.get_ref().len();
        self.duration += start.elapsed();
        Ok(std::mem::take(self.compress.get_mut()).into()) // into() Bytes will drop excess capacity
    }

    fn stat(&self) -> (&'static str, usize, usize, Duration) {
        ("gzip", self.total_in, self.total_out, self.duration)
    }
}

use std::ops::{Deref, DerefMut};
impl Deref for Decompressor {
    type Target = GzDecoder<Vec<u8>>;

    fn deref(&self) -> &Self::Target {
        &self.decompress
    }
}

impl DerefMut for Decompressor {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.decompress
    }
}

impl Deref for Compressor {
    type Target = GzEncoder<Vec<u8>>;

    fn deref(&self) -> &Self::Target {
        &self.compress
    }
}

impl DerefMut for Compressor {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.compress
    }
}

#[cfg(test)]
mod tests_stream {
    use super::*;

    #[test]
    fn gzip_data() {
        let mut compressor = Compressor::new(6);
        let compressed = compressor.encode(b"abcdefg", true).unwrap();
        // gzip magic headers
        assert_eq!(&compressed[..3], &[0x1f, 0x8b, 0x08]);
        // check the crc32 footer
        assert_eq!(
            &compressed[compressed.len() - 9..],
            &[0, 166, 106, 42, 49, 7, 0, 0, 0]
        );
        assert_eq!(compressor.total_in, 7);
        assert_eq!(compressor.total_out, compressed.len());

        assert!(compressor.get_ref().is_empty());
    }

    #[test]
    fn gunzip_data() {
        let mut decompressor = Decompressor::new();

        let compressed_bytes = &[
            0x1f, 0x8b, 0x08, 0, 0, 0, 0, 0, 0, 255, 75, 76, 74, 78, 73, 77, 75, 7, 0, 166, 106,
            42, 49, 7, 0, 0, 0,
        ];
        let decompressed = decompressor.encode(compressed_bytes, true).unwrap();

        assert_eq!(&decompressed[..], b"abcdefg");
        assert_eq!(decompressor.total_in, compressed_bytes.len());
        assert_eq!(decompressor.total_out, decompressed.len());

        assert!(decompressor.get_ref().is_empty());
    }
}


================================================
FILE: pingora-core/src/protocols/http/compression/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! HTTP response (de)compression libraries
//!
//! Gzip, Brotli, Zstd, and dictionary-compressed Zstd (dcz, [RFC 9842](https://datatracker.ietf.org/doc/html/rfc9842)) are supported.

use super::HttpTask;

use bytes::Bytes;
use log::{debug, warn};
use pingora_error::{ErrorType, Result};
use pingora_http::{RequestHeader, ResponseHeader};
use std::time::Duration;

use strum::EnumCount;
use strum_macros::EnumCount as EnumCountMacro;

mod brotli;
mod gzip;
mod zstd;

/// Re-export [RFC 9842](https://datatracker.ietf.org/doc/html/rfc9842) constants for external use.
pub use zstd::{DCZ_HEADER_SIZE, DCZ_MAGIC};

/// The type of error to return when (de)compression fails
pub const COMPRESSION_ERROR: ErrorType = ErrorType::new("CompressionError");

/// The trait for both compress and decompress because the interface and syntax are the same:
/// encode some bytes to other bytes
pub trait Encode {
    /// Encode the input bytes. The `end` flag signals the end of the entire input. The `end` flag
    /// helps the encoder to flush out the remaining buffered encoded data because certain compression
    /// algorithms prefer to collect large enough data to compress all together.
    fn encode(&mut self, input: &[u8], end: bool) -> Result<Bytes>;
    /// Return the Encoder's name, the total input bytes, the total output bytes and the total
    /// duration spent on encoding the data.
    fn stat(&self) -> (&'static str, usize, usize, Duration);
}

/// The response compression object. Currently support gzip compression and brotli decompression.
///
/// To use it, the caller should create a [`ResponseCompressionCtx`] per HTTP session.
/// The caller should call the corresponding filters for the request header, response header and
/// response body. If the algorithms are supported, the output response body will be encoded.
/// The response header will be adjusted accordingly as well. If the algorithm is not supported
/// or no encoding is needed, the response is untouched.
///
/// If configured and if the request's `accept-encoding` header contains the algorithm supported and the
/// incoming response doesn't have that encoding, the filter will compress the response.
/// If configured and supported, and if the incoming response's `content-encoding` isn't one of the
/// request's `accept-encoding` supported algorithm, the ctx will decompress the response.
///
/// # Currently supported algorithms and actions
/// - Brotli decompression: if the response is br compressed, this ctx can decompress it
/// - Gzip compression: if the response is uncompressed, this ctx can compress it with gzip
pub struct ResponseCompressionCtx(CtxInner);

/// Dictionary data for [RFC 9842](https://datatracker.ietf.org/doc/html/rfc9842) shared dictionary compression.
#[derive(Clone, Debug)]
pub struct DictionaryData {
    pub bytes: Bytes,
    pub hash: [u8; 32],
}

enum CtxInner {
    HeaderPhase {
        // Store the preferred list to compare with content-encoding
        accept_encoding: Vec<Algorithm>,
        encoding_levels: [u32; Algorithm::COUNT],
        decompress_enable: [bool; Algorithm::COUNT],
        preserve_etag: [bool; Algorithm::COUNT],
        // Optional dictionary for dcz compression (RFC 9842).
        dictionary: Option<DictionaryData>,
    },
    BodyPhase(Option<Box<dyn Encode + Send + Sync>>),
}

impl ResponseCompressionCtx {
    /// Create a new [`ResponseCompressionCtx`] with the expected compression level. `0` will disable
    /// the compression. The compression level is applied across all algorithms.
    /// The `decompress_enable` flag will tell the ctx to decompress if needed.
    /// The `preserve_etag` flag indicates whether the ctx should avoid modifying the etag,
    /// which will otherwise be weakened if the flag is false and (de)compression is applied.
    pub fn new(compression_level: u32, decompress_enable: bool, preserve_etag: bool) -> Self {
        Self(CtxInner::HeaderPhase {
            accept_encoding: Vec::new(),
            encoding_levels: [compression_level; Algorithm::COUNT],
            decompress_enable: [decompress_enable; Algorithm::COUNT],
            preserve_etag: [preserve_etag; Algorithm::COUNT],
            dictionary: None,
        })
    }

    /// Whether the encoder is enabled.
    /// The enablement will change according to the request and response filter by this ctx.
    pub fn is_enabled(&self) -> bool {
        match &self.0 {
            CtxInner::HeaderPhase {
                decompress_enable,
                encoding_levels: levels,
                ..
            } => levels.iter().any(|l| *l != 0) || decompress_enable.iter().any(|d| *d),
            CtxInner::BodyPhase(c) => c.is_some(),
        }
    }

    /// Return the stat of this ctx:
    /// algorithm name, in bytes, out bytes, time took for the compression
    pub fn get_info(&self) -> Option<(&'static str, usize, usize, Duration)> {
        match &self.0 {
            CtxInner::HeaderPhase { .. } => None,
            CtxInner::BodyPhase(c) => c.as_ref().map(|c| c.stat()),
        }
    }

    /// Adjust the compression level for all compression algorithms.
    /// # Panic
    /// This function will panic if it has already started encoding the response body.
    pub fn adjust_level(&mut self, new_level: u32) {
        match &mut self.0 {
            CtxInner::HeaderPhase {
                encoding_levels: levels,
                ..
            } => {
                *levels = [new_level; Algorithm::COUNT];
            }
            CtxInner::BodyPhase(_) => panic!("Wrong phase: BodyPhase"),
        }
    }

    /// Adjust the compression level for a specific algorithm.
    /// # Panic
    /// This function will panic if it has already started encoding the response body.
    pub fn adjust_algorithm_level(&mut self, algorithm: Algorithm, new_level: u32) {
        match &mut self.0 {
            CtxInner::HeaderPhase {
                encoding_levels: levels,
                ..
            } => {
                levels[algorithm.index()] = new_level;
            }
            CtxInner::BodyPhase(_) => panic!("Wrong phase: BodyPhase"),
        }
    }

    /// Adjust the decompression flag for all compression algorithms.
    /// # Panic
    /// This function will panic if it has already started encoding the response body.
    pub fn adjust_decompression(&mut self, enabled: bool) {
        match &mut self.0 {
            CtxInner::HeaderPhase {
                decompress_enable, ..
            } => {
                *decompress_enable = [enabled; Algorithm::COUNT];
            }
            CtxInner::BodyPhase(_) => panic!("Wrong phase: BodyPhase"),
        }
    }

    /// Adjust the decompression flag for a specific algorithm.
    /// # Panic
    /// This function will panic if it has already started encoding the response body.
    pub fn adjust_algorithm_decompression(&mut self, algorithm: Algorithm, enabled: bool) {
        match &mut self.0 {
            CtxInner::HeaderPhase {
                decompress_enable, ..
            } => {
                decompress_enable[algorithm.index()] = enabled;
            }
            CtxInner::BodyPhase(_) => panic!("Wrong phase: BodyPhase"),
        }
    }

    /// Adjust preserve etag setting.
    /// # Panic
    /// This function will panic if it has already started encoding the response body.
    pub fn adjust_preserve_etag(&mut self, enabled: bool) {
        match &mut self.0 {
            CtxInner::HeaderPhase { preserve_etag, .. } => {
                *preserve_etag = [enabled; Algorithm::COUNT];
            }
            CtxInner::BodyPhase(_) => panic!("Wrong phase: BodyPhase"),
        }
    }

    /// Adjust preserve etag setting for a specific algorithm.
    /// # Panic
    /// This function will panic if it has already started encoding the response body.
    pub fn adjust_algorithm_preserve_etag(&mut self, algorithm: Algorithm, enabled: bool) {
        match &mut self.0 {
            CtxInner::HeaderPhase { preserve_etag, .. } => {
                preserve_etag[algorithm.index()] = enabled;
            }
            CtxInner::BodyPhase(_) => panic!("Wrong phase: BodyPhase"),
        }
    }

    /// Set the dictionary for [RFC 9842](https://datatracker.ietf.org/doc/html/rfc9842) dictionary compression.
    /// # Panic
    /// This function will panic if it has already started encoding the response body.
    pub fn set_dictionary(&mut self, dictionary_bytes: Bytes, dictionary_hash: [u8; 32]) {
        match &mut self.0 {
            CtxInner::HeaderPhase { dictionary, .. } => {
                *dictionary = Some(DictionaryData {
                    bytes: dictionary_bytes,
                    hash: dictionary_hash,
                });
            }
            CtxInner::BodyPhase(_) => panic!("Wrong phase: BodyPhase"),
        }
    }

    /// Check if a dictionary has been set.
    pub fn has_dictionary(&self) -> bool {
        match &self.0 {
            CtxInner::HeaderPhase { dictionary, .. } => dictionary.is_some(),
            CtxInner::BodyPhase(_) => false,
        }
    }

    /// Clear any previously set dictionary.
    /// # Panic
    /// This function will panic if it has already started encoding the response body.
    pub fn clear_dictionary(&mut self) {
        match &mut self.0 {
            CtxInner::HeaderPhase { dictionary, .. } => {
                *dictionary = None;
            }
            CtxInner::BodyPhase(_) => panic!("Wrong phase: BodyPhase"),
        }
    }

    /// Feed the request header into this ctx.
    pub fn request_filter(&mut self, req: &RequestHeader) {
        if !self.is_enabled() {
            return;
        }
        match &mut self.0 {
            CtxInner::HeaderPhase {
                accept_encoding, ..
            } => parse_accept_encoding(
                req.headers.get(http::header::ACCEPT_ENCODING),
                accept_encoding,
            ),
            CtxInner::BodyPhase(_) => panic!("Wrong phase: BodyPhase"),
        }
    }

    /// Feed the response header into this ctx
    pub fn response_header_filter(&mut self, resp: &mut ResponseHeader, end: bool) {
        if !self.is_enabled() {
            return;
        }
        match &self.0 {
            CtxInner::HeaderPhase {
                decompress_enable,
                preserve_etag,
                accept_encoding,
                encoding_levels: levels,
                dictionary,
            } => {
                if resp.status.is_informational() {
                    if resp.status == http::status::StatusCode::SWITCHING_PROTOCOLS {
                        // no transformation for websocket (TODO: cite RFC)
                        self.0 = CtxInner::BodyPhase(None);
                    }
                    // else, wait for the final response header for decision
                    return;
                }
                // do nothing if no body
                if end {
                    self.0 = CtxInner::BodyPhase(None);
                    return;
                }

                if depends_on_accept_encoding(
                    resp,
                    levels.iter().any(|level| *level != 0),
                    decompress_enable,
                ) {
                    // The response depends on the Accept-Encoding header, make sure to indicate it
                    // in the Vary response header.
                    // https://www.rfc-editor.org/rfc/rfc9110#name-vary
                    add_vary_header(resp, &http::header::ACCEPT_ENCODING);
                }

                let action = decide_action(resp, accept_encoding);
                debug!("compression action: {action:?}");
                let (encoder, preserve_etag) = match action {
                    Action::Noop => (None, false),
                    Action::Compress(algorithm) => {
                        let idx = algorithm.index();
                        let compressor = match algorithm {
                            Algorithm::Dcz => dictionary.as_ref().and_then(|d| {
                                algorithm.maybe_compressor_with_dictionary(levels[idx], d)
                            }),
                            _ => algorithm.compressor(levels[idx]),
                        };
                        (compressor, preserve_etag[idx])
                    }
                    Action::Decompress(algorithm) => {
                        let idx = algorithm.index();
                        (
                            algorithm.decompressor(decompress_enable[idx]),
                            preserve_etag[idx],
                        )
                    }
                };
                if encoder.is_some() {
                    adjust_response_header(resp, &action, preserve_etag);
                }
                self.0 = CtxInner::BodyPhase(encoder);
            }
            CtxInner::BodyPhase(_) => panic!("Wrong phase: BodyPhase"),
        }
    }

    /// Stream the response body chunks into this ctx. The return value will be the compressed
    /// data.
    ///
    /// Return None if compression is not enabled.
    pub fn response_body_filter(&mut self, data: Option<&Bytes>, end: bool) -> Option<Bytes> {
        match &mut self.0 {
            CtxInner::HeaderPhase { .. } => panic!("Wrong phase: HeaderPhase"),
            CtxInner::BodyPhase(compressor) => {
                let result = compressor
                    .as_mut()
                    .map(|c| {
                        // Feed even empty slice to compressor because it might yield data
                        // when `end` is true
                        let data = if let Some(b) = data { b.as_ref() } else { &[] };
                        c.encode(data, end)
                    })
                    .transpose();
                result.unwrap_or_else(|e| {
                    warn!("Failed to compress, compression disabled, {}", e);
                    // no point to transcode further data because bad data is already seen
                    self.0 = CtxInner::BodyPhase(None);
                    None
                })
            }
        }
    }

    // TODO: retire this function, replace it with the two functions above
    /// Feed the response into this ctx.
    /// This filter will mutate the response accordingly if encoding is needed.
    pub fn response_filter(&mut self, t: &mut HttpTask) {
        if !self.is_enabled() {
            return;
        }
        match t {
            HttpTask::Header(resp, end) => self.response_header_filter(resp, *end),
            HttpTask::Body(data, end) => {
                let compressed = self.response_body_filter(data.as_ref(), *end);
                if compressed.is_some() {
                    *t = HttpTask::Body(compressed, *end);
                }
            }
            HttpTask::Done => {
                // try to finish/flush compression
                let compressed = self.response_body_filter(None, true);
                if compressed.is_some() {
                    // compressor has more data to flush
                    *t = HttpTask::Body(compressed, true);
                }
            }
            _ => { /* Trailer, Failed: do nothing? */ }
        }
    }
}

#[derive(Debug, PartialEq, Eq, Clone, Copy, EnumCountMacro)]
pub enum Algorithm {
    Any, // the "*"
    Gzip,
    Brotli,
    Zstd,
    Dcb,
    Dcz,
    // TODO: Identity,
    // TODO: Deflate
    Other, // anything unknown
}

impl Algorithm {
    pub fn as_str(&self) -> &'static str {
        match self {
            Algorithm::Gzip => "gzip",
            Algorithm::Brotli => "br",
            Algorithm::Zstd => "zstd",
            Algorithm::Dcb => "dcb",
            Algorithm::Dcz => "dcz",
            Algorithm::Any => "*",
            Algorithm::Other => "other",
        }
    }

    pub fn compressor(&self, level: u32) -> Option<Box<dyn Encode + Send + Sync>> {
        if level == 0 {
            None
        } else {
            match self {
                Self::Gzip => Some(Box::new(gzip::Compressor::new(level))),
                Self::Brotli => Some(Box::new(brotli::Compressor::new(level))),
                Self::Zstd => Some(Box::new(zstd::Compressor::new(level))),
                _ => None, // not implemented
            }
        }
    }

    pub fn maybe_compressor_with_dictionary(
        &self,
        level: u32,
        dictionary: &DictionaryData,
    ) -> Option<Box<dyn Encode + Send + Sync>> {
        if level == 0 {
            None
        } else {
            match self {
                Self::Dcz => {
                    match zstd::DictionaryCompressor::new(level, &dictionary.bytes, dictionary.hash)
                    {
                        Ok(c) => Some(Box::new(c)),
                        Err(e) => {
                            warn!("Failed to create DCZ compressor: {e}");
                            None
                        }
                    }
                }
                _ => None,
            }
        }
    }

    pub fn decompressor(&self, enabled: bool) -> Option<Box<dyn Encode + Send + Sync>> {
        if !enabled {
            None
        } else {
            match self {
                Self::Gzip => Some(Box::new(gzip::Decompressor::new())),
                Self::Brotli => Some(Box::new(brotli::Decompressor::new())),
                _ => None, // not implemented
            }
        }
    }

    pub fn index(&self) -> usize {
        *self as usize
    }
}

impl From<&str> for Algorithm {
    fn from(s: &str) -> Self {
        use unicase::UniCase;

        let coding = UniCase::new(s);
        if coding == UniCase::ascii("gzip") {
            Algorithm::Gzip
        } else if coding == UniCase::ascii("br") {
            Algorithm::Brotli
        } else if coding == UniCase::ascii("zstd") {
            Algorithm::Zstd
        } else if coding == UniCase::ascii("dcb") {
            Algorithm::Dcb
        } else if coding == UniCase::ascii("dcz") {
            Algorithm::Dcz
        } else if s.is_empty() {
            Algorithm::Any
        } else {
            Algorithm::Other
        }
    }
}

#[derive(Debug, PartialEq, Eq, Clone, Copy)]
enum Action {
    Noop, // do nothing, e.g. when the input is already gzip
    Compress(Algorithm),
    Decompress(Algorithm),
}

// parse Accept-Encoding header and put it to the list
fn parse_accept_encoding(accept_encoding: Option<&http::HeaderValue>, list: &mut Vec<Algorithm>) {
    // https://www.rfc-editor.org/rfc/rfc9110#name-accept-encoding
    if let Some(ac) = accept_encoding {
        // fast path
        if ac.as_bytes() == b"gzip" {
            list.push(Algorithm::Gzip);
            return;
        }
        // properly parse AC header
        match sfv::Parser::parse_list(ac.as_bytes()) {
            Ok(parsed) => {
                for item in parsed {
                    if let sfv::ListEntry::Item(i) = item {
                        if let Some(s) = i.bare_item.as_token() {
                            // TODO: support q value
                            let algorithm = Algorithm::from(s);
                            // ignore algorithms that we don't understand ignore
                            if algorithm != Algorithm::Other {
                                list.push(Algorithm::from(s));
                            }
                        }
                    }
                }
            }
            Err(e) => {
                warn!("Failed to parse accept-encoding {ac:?}, {e}")
            }
        }
    } else {
        // "If no Accept-Encoding header, any content coding is acceptable"
        // keep the list empty
    }
}

#[test]
fn test_accept_encoding_req_header() {
    let mut header = RequestHeader::build("GET", b"/", None).unwrap();
    let mut ac_list = Vec::new();
    parse_accept_encoding(
        header.headers.get(http::header::ACCEPT_ENCODING),
        &mut ac_list,
    );
    assert!(ac_list.is_empty());

    let mut ac_list = Vec::new();
    header.insert_header("accept-encoding", "gzip").unwrap();
    parse_accept_encoding(
        header.headers.get(http::header::ACCEPT_ENCODING),
        &mut ac_list,
    );
    assert_eq!(ac_list[0], Algorithm::Gzip);

    let mut ac_list = Vec::new();
    header
        .insert_header("accept-encoding", "what, br, gzip")
        .unwrap();
    parse_accept_encoding(
        header.headers.get(http::header::ACCEPT_ENCODING),
        &mut ac_list,
    );
    assert_eq!(ac_list[0], Algorithm::Brotli);
    assert_eq!(ac_list[1], Algorithm::Gzip);
}

// test whether the response depends on Accept-Encoding header
fn depends_on_accept_encoding(
    resp: &ResponseHeader,
    compress_enabled: bool,
    decompress_enabled: &[bool],
) -> bool {
    use http::header::CONTENT_ENCODING;

    (decompress_enabled.iter().any(|enabled| *enabled)
        && resp.headers.get(CONTENT_ENCODING).is_some())
        || (compress_enabled && compressible(resp))
}

#[test]
fn test_decide_on_accept_encoding() {
    let mut resp = ResponseHeader::build(200, None).unwrap();
    resp.insert_header("content-length", "50").unwrap();
    resp.insert_header("content-type", "text/html").unwrap();
    resp.insert_header("content-encoding", "gzip").unwrap();

    // enabled
    assert!(depends_on_accept_encoding(&resp, false, &[true]));

    // decompress disabled => disabled
    assert!(!depends_on_accept_encoding(&resp, false, &[false]));

    // no content-encoding => disabled
    resp.remove_header("content-encoding");
    assert!(!depends_on_accept_encoding(&resp, false, &[true]));

    // compress enabled and compressible response => enabled
    assert!(depends_on_accept_encoding(&resp, true, &[false]));

    // compress disabled and compressible response => disabled
    assert!(!depends_on_accept_encoding(&resp, false, &[false]));

    // compress enabled and not compressible response => disabled
    resp.insert_header("content-type", "text/html+zip").unwrap();
    assert!(!depends_on_accept_encoding(&resp, true, &[false]));
}

// filter response header to see if (de)compression is needed
fn decide_action(resp: &ResponseHeader, accept_encoding: &[Algorithm]) -> Action {
    use http::header::CONTENT_ENCODING;

    let content_encoding = if let Some(ce) = resp.headers.get(CONTENT_ENCODING) {
        // https://www.rfc-editor.org/rfc/rfc9110#name-content-encoding
        if let Ok(ce_str) = std::str::from_utf8(ce.as_bytes()) {
            Some(Algorithm::from(ce_str))
        } else {
            // not utf-8, treat it as unknown encoding to leave it untouched
            Some(Algorithm::Other)
        }
    } else {
        // no Accept-encoding
        None
    };

    if let Some(ce) = content_encoding {
        if accept_encoding.contains(&ce) {
            // downstream can accept this encoding, nothing to do
            Action::Noop
        } else {
            // always decompress because uncompressed is always acceptable
            // https://www.rfc-editor.org/rfc/rfc9110#field.accept-encoding
            // "If the representation has no content coding, then it is acceptable by default
            // unless specifically excluded..." TODO: check the exclude case
            // TODO: we could also transcode it to a preferred encoding, e.g. br->gzip
            Action::Decompress(ce)
        }
    } else if accept_encoding.is_empty() // both CE and AE are empty
        || !compressible(resp) // the type is not compressible
        || accept_encoding[0] == Algorithm::Any
    {
        Action::Noop
    } else {
        // try to compress with the first AC
        // TODO: support to configure preferred encoding
        Action::Compress(accept_encoding[0])
    }
}

#[test]
fn test_decide_action() {
    use Action::*;
    use Algorithm::*;

    let header = ResponseHeader::build(200, None).unwrap();
    // no compression asked, no compression needed
    assert_eq!(decide_action(&header, &[]), Noop);

    // already gzip, no compression needed
    let mut header = ResponseHeader::build(200, None).unwrap();
    header.insert_header("content-type", "text/html").unwrap();
    header.insert_header("content-encoding", "gzip").unwrap();
    assert_eq!(decide_action(&header, &[Gzip]), Noop);

    // already gzip, no compression needed, upper case
    let mut header = ResponseHeader::build(200, None).unwrap();
    header.insert_header("content-encoding", "GzIp").unwrap();
    header.insert_header("content-type", "text/html").unwrap();
    assert_eq!(decide_action(&header, &[Gzip]), Noop);

    // no encoding, compression needed, accepted content-type, large enough
    // Will compress
    let mut header = ResponseHeader::build(200, None).unwrap();
    header.insert_header("content-length", "20").unwrap();
    header.insert_header("content-type", "text/html").unwrap();
    assert_eq!(decide_action(&header, &[Gzip]), Compress(Gzip));

    // too small
    let mut header = ResponseHeader::build(200, None).unwrap();
    header.insert_header("content-length", "19").unwrap();
    header.insert_header("content-type", "text/html").unwrap();
    assert_eq!(decide_action(&header, &[Gzip]), Noop);

    // already compressed MIME
    let mut header = ResponseHeader::build(200, None).unwrap();
    header.insert_header("content-length", "20").unwrap();
    header
        .insert_header("content-type", "text/html+zip")
        .unwrap();
    assert_eq!(decide_action(&header, &[Gzip]), Noop);

    // unsupported MIME
    let mut header = ResponseHeader::build(200, None).unwrap();
    header.insert_header("content-length", "20").unwrap();
    header.insert_header("content-type", "image/jpg").unwrap();
    assert_eq!(decide_action(&header, &[Gzip]), Noop);

    // compressed, need decompress
    let mut header = ResponseHeader::build(200, None).unwrap();
    header.insert_header("content-encoding", "gzip").unwrap();
    assert_eq!(decide_action(&header, &[]), Decompress(Gzip));

    // accept-encoding different, need decompress
    let mut header = ResponseHeader::build(200, None).unwrap();
    header.insert_header("content-encoding", "gzip").unwrap();
    assert_eq!(decide_action(&header, &[Brotli]), Decompress(Gzip));

    // less preferred but no need to decompress
    let mut header = ResponseHeader::build(200, None).unwrap();
    header.insert_header("content-encoding", "gzip").unwrap();
    assert_eq!(decide_action(&header, &[Brotli, Gzip]), Noop);

    // dcb passthrough: client accepts dcb, response has dcb
    let mut header = ResponseHeader::build(200, None).unwrap();
    header.insert_header("content-encoding", "dcb").unwrap();
    assert_eq!(decide_action(&header, &[Dcb, Brotli]), Noop);

    // dcz passthrough: client accepts dcz, response has dcz
    let mut header = ResponseHeader::build(200, None).unwrap();
    header.insert_header("content-encoding", "dcz").unwrap();
    assert_eq!(decide_action(&header, &[Dcz, Zstd]), Noop);

    // Client wants dcz but response has brotli, decompress brotli
    let mut header = ResponseHeader::build(200, None).unwrap();
    header.insert_header("content-encoding", "br").unwrap();
    assert_eq!(decide_action(&header, &[Dcz]), Decompress(Brotli));

    // Client wants dcz but response has zstd, decompress zstd
    let mut header = ResponseHeader::build(200, None).unwrap();
    header.insert_header("content-encoding", "zstd").unwrap();
    assert_eq!(decide_action(&header, &[Dcz]), Decompress(Zstd));

    // Client wants dcb but response has gzip, decompress gzip
    let mut header = ResponseHeader::build(200, None).unwrap();
    header.insert_header("content-encoding", "gzip").unwrap();
    assert_eq!(decide_action(&header, &[Dcb]), Decompress(Gzip));

    // Client wants dcb but response has brotli, decompress brotli
    let mut header = ResponseHeader::build(200, None).unwrap();
    header.insert_header("content-encoding", "br").unwrap();
    assert_eq!(decide_action(&header, &[Dcb]), Decompress(Brotli));
}

use once_cell::sync::Lazy;
use regex::Regex;

// Allow text, application, font, a few image/ MIME types and binary/octet-stream
// TODO: fine tune this list
static MIME_CHECK: Lazy<Regex> = Lazy::new(|| {
    Regex::new(r"^(?:text/|application/|font/|image/(?:x-icon|svg\+xml|nd\.microsoft\.icon)|binary/octet-stream)")
        .unwrap()
});

// check if the response mime type is compressible
fn compressible(resp: &ResponseHeader) -> bool {
    // arbitrary size limit, things to consider
    // 1. too short body may have little redundancy to compress
    // 2. gzip header and footer overhead
    // 3. latency is the same as long as data fits in a TCP congestion window regardless of size
    const MIN_COMPRESS_LEN: usize = 20;

    // check if response is too small to compress
    if let Some(cl) = resp.headers.get(http::header::CONTENT_LENGTH) {
        if let Some(cl_num) = std::str::from_utf8(cl.as_bytes())
            .ok()
            .and_then(|v| v.parse::<usize>().ok())
        {
            if cl_num < MIN_COMPRESS_LEN {
                return false;
            }
        }
    }
    // no Content-Length or large enough, check content-type next
    if let Some(ct) = resp.headers.get(http::header::CONTENT_TYPE) {
        if let Ok(ct_str) = std::str::from_utf8(ct.as_bytes()) {
            if ct_str.contains("zip") {
                // heuristic: don't compress mime type that has zip in it
                false
            } else {
                // check if mime type in allow list
                MIME_CHECK.find(ct_str).is_some()
            }
        } else {
            false // invalid CT header, don't compress
        }
    } else {
        false // don't compress empty content-type
    }
}

// add Vary header with the specified value or extend an existing Vary header value
fn add_vary_header(resp: &mut ResponseHeader, value: &http::header::HeaderName) {
    use http::header::{HeaderValue, VARY};

    let already_present = resp.headers.get_all(VARY).iter().any(|existing| {
        existing
            .as_bytes()
            .split(|b| *b == b',')
            .map(|mut v| {
                // This is equivalent to slice.trim_ascii() which is unstable
                while let [first, rest @ ..] = v {
                    if first.is_ascii_whitespace() {
                        v = rest;
                    } else {
                        break;
                    }
                }
                while let [rest @ .., last] = v {
                    if last.is_ascii_whitespace() {
                        v = rest;
                    } else {
                        break;
                    }
                }
                v
            })
            .any(|v| v == b"*" || v.eq_ignore_ascii_case(value.as_ref()))
    });

    if !already_present {
        resp.append_header(&VARY, HeaderValue::from_name(value.clone()))
            .unwrap();
    }
}

#[test]
fn test_add_vary_header() {
    let mut header = ResponseHeader::build(200, None).unwrap();
    add_vary_header(&mut header, &http::header::ACCEPT_ENCODING);
    assert_eq!(
        header
            .headers
            .get_all("Vary")
            .into_iter()
            .collect::<Vec<_>>(),
        vec!["accept-encoding"]
    );

    let mut header = ResponseHeader::build(200, None).unwrap();
    header.insert_header("Vary", "Accept-Language").unwrap();
    add_vary_header(&mut header, &http::header::ACCEPT_ENCODING);
    assert_eq!(
        header
            .headers
            .get_all("Vary")
            .into_iter()
            .collect::<Vec<_>>(),
        vec!["Accept-Language", "accept-encoding"]
    );

    let mut header = ResponseHeader::build(200, None).unwrap();
    header
        .insert_header("Vary", "Accept-Language, Accept-Encoding")
        .unwrap();
    add_vary_header(&mut header, &http::header::ACCEPT_ENCODING);
    assert_eq!(
        header
            .headers
            .get_all("Vary")
            .into_iter()
            .collect::<Vec<_>>(),
        vec!["Accept-Language, Accept-Encoding"]
    );

    let mut header = ResponseHeader::build(200, None).unwrap();
    header.insert_header("Vary", "*").unwrap();
    add_vary_header(&mut header, &http::header::ACCEPT_ENCODING);
    assert_eq!(
        header
            .headers
            .get_all("Vary")
            .into_iter()
            .collect::<Vec<_>>(),
        vec!["*"]
    );
}

fn adjust_response_header(resp: &mut ResponseHeader, action: &Action, preserve_etag: bool) {
    use http::header::{
        HeaderValue, ACCEPT_RANGES, CONTENT_ENCODING, CONTENT_LENGTH, ETAG, TRANSFER_ENCODING,
    };

    fn set_stream_headers(resp: &mut ResponseHeader) {
        // because the transcoding is streamed, content length is not known ahead
        resp.remove_header(&CONTENT_LENGTH);
        // remove Accept-Ranges header because range requests will no longer work
        resp.remove_header(&ACCEPT_RANGES);

        // we stream body now TODO: chunked is for h1 only
        resp.insert_header(&TRANSFER_ENCODING, HeaderValue::from_static("chunked"))
            .unwrap();
    }

    fn weaken_or_clear_etag(resp: &mut ResponseHeader) {
        // RFC9110: https://datatracker.ietf.org/doc/html/rfc9110#section-8.8.1
        // "a validator is weak if it is shared by two or more representations
        // of a given resource at the same time, unless those representations
        // have identical representation data"
        // Follow nginx gzip filter's example when changing content encoding:
        // - if the ETag is not a valid strong ETag, clear it (i.e. does not start with `"`)
        // - else, weaken it
        if let Some(etag) = resp.headers.get(&ETAG) {
            let etag_bytes = etag.as_bytes();
            if etag_bytes.starts_with(b"W/") {
                // this is already a weak ETag, noop
            } else if etag_bytes.starts_with(b"\"") {
                // strong ETag, weaken since we are changing the byte representation
                let weakened_etag = HeaderValue::from_bytes(&[b"W/", etag_bytes].concat())
                    .expect("valid header value prefixed with \"W/\" should remain valid");
                resp.insert_header(&ETAG, weakened_etag)
                    .expect("can insert weakened etag when etag was already valid");
            } else {
                // invalid strong ETag, just clear it
                // https://datatracker.ietf.org/doc/html/rfc9110#section-8.8.3
                // says the opaque-tag section needs to be a quoted string
                resp.remove_header(&ETAG);
            }
        }
    }

    match action {
        Action::Noop => { /* do nothing */ }
        Action::Decompress(_) => {
            resp.remove_header(&CONTENT_ENCODING);
            set_stream_headers(resp);
            if !preserve_etag {
                weaken_or_clear_etag(resp);
            }
        }
        Action::Compress(a) => {
            resp.insert_header(&CONTENT_ENCODING, HeaderValue::from_static(a.as_str()))
                .unwrap();
            set_stream_headers(resp);
            if !preserve_etag {
                weaken_or_clear_etag(resp);
            }
        }
    }
}

#[test]
fn test_adjust_response_header() {
    use Action::*;
    use Algorithm::*;

    // noop
    let mut header = ResponseHeader::build(200, None).unwrap();
    header.insert_header("content-length", "20").unwrap();
    header.insert_header("content-encoding", "gzip").unwrap();
    header.insert_header("accept-ranges", "bytes").unwrap();
    header.insert_header("etag", "\"abc123\"").unwrap();
    adjust_response_header(&mut header, &Noop, false);
    assert_eq!(
        header.headers.get("content-encoding").unwrap().as_bytes(),
        b"gzip"
    );
    assert_eq!(
        header.headers.get("content-length").unwrap().as_bytes(),
        b"20"
    );
    assert_eq!(
        header.headers.get("etag").unwrap().as_bytes(),
        b"\"abc123\""
    );
    assert!(header.headers.get("transfer-encoding").is_none());

    // decompress gzip
    let mut header = ResponseHeader::build(200, None).unwrap();
    header.insert_header("content-length", "20").unwrap();
    header.insert_header("content-encoding", "gzip").unwrap();
    header.insert_header("accept-ranges", "bytes").unwrap();
    header.insert_header("etag", "\"abc123\"").unwrap();
    adjust_response_header(&mut header, &Decompress(Gzip), false);
    assert!(header.headers.get("content-encoding").is_none());
    assert!(header.headers.get("content-length").is_none());
    assert_eq!(
        header.headers.get("transfer-encoding").unwrap().as_bytes(),
        b"chunked"
    );
    assert!(header.headers.get("accept-ranges").is_none());
    assert_eq!(
        header.headers.get("etag").unwrap().as_bytes(),
        b"W/\"abc123\""
    );
    // when preserve_etag on, strong etag is kept
    header.insert_header("etag", "\"abc123\"").unwrap();
    adjust_response_header(&mut header, &Decompress(Gzip), true);
    assert_eq!(
        header.headers.get("etag").unwrap().as_bytes(),
        b"\"abc123\""
    );

    // compress
    let mut header = ResponseHeader::build(200, None).unwrap();
    header.insert_header("content-length", "20").unwrap();
    header.insert_header("accept-ranges", "bytes").unwrap();
    // try invalid etag, should be cleared
    header.insert_header("etag", "abc123").unwrap();
    adjust_response_header(&mut header, &Compress(Gzip), false);
    assert_eq!(
        header.headers.get("content-encoding").unwrap().as_bytes(),
        b"gzip"
    );
    assert!(header.headers.get("content-length").is_none());
    assert!(header.headers.get("accept-ranges").is_none());
    assert_eq!(
        header.headers.get("transfer-encoding").unwrap().as_bytes(),
        b"chunked"
    );
    assert!(header.headers.get("etag").is_none());
    // when preserve_etag on, etag is kept
    header.insert_header("etag", "abc123").unwrap();
    adjust_response_header(&mut header, &Compress(Gzip), true);
    assert_eq!(header.headers.get("etag").unwrap().as_bytes(), b"abc123");
}

#[cfg(test)]
mod tests_dictionary_compression {
    use super::*;

    const TEST_DICTIONARY: &[u8] = b"The quick brown fox jumps over the lazy dog. \
        Common HTTP headers: Content-Type, Accept-Encoding, Cache-Control. \
        JSON patterns: {\"key\": \"value\"}, [\"array\", \"items\"].";

    fn test_dictionary_hash() -> [u8; 32] {
        let mut hash = [0u8; 32];
        for (i, byte) in TEST_DICTIONARY.iter().take(32).enumerate() {
            hash[i] = *byte;
        }
        hash
    }

    #[test]
    fn set_and_clear_dictionary() {
        let mut ctx = ResponseCompressionCtx::new(3, false, false);
        assert!(!ctx.has_dictionary());

        ctx.set_dictionary(Bytes::from_static(TEST_DICTIONARY), test_dictionary_hash());
        assert!(ctx.has_dictionary());

        ctx.clear_dictionary();
        assert!(!ctx.has_dictionary());
    }

    #[test]
    fn dcz_compression_with_dictionary() {
        let mut ctx = ResponseCompressionCtx::new(3, false, false);
        let hash = test_dictionary_hash();
        ctx.set_dictionary(Bytes::from_static(TEST_DICTIONARY), hash);

        let mut req = RequestHeader::build("GET", b"/test.js", None).unwrap();
        req.insert_header("accept-encoding", "dcz, br, gzip")
            .unwrap();
        ctx.request_filter(&req);

        let mut resp = ResponseHeader::build(200, None).unwrap();
        resp.insert_header("content-type", "application/javascript")
            .unwrap();
        resp.insert_header("content-length", "1000").unwrap();
        ctx.response_header_filter(&mut resp, false);

        assert_eq!(
            resp.headers.get("content-encoding").unwrap().as_bytes(),
            b"dcz"
        );

        let input = Bytes::from_static(b"The quick brown fox jumps over the lazy dog again.");
        let compressed = ctx.response_body_filter(Some(&input), true).unwrap();

        assert!(compressed.len() >= 40);
        assert_eq!(&compressed[..8], &zstd::DCZ_MAGIC);
        assert_eq!(&compressed[8..40], &hash);
    }

    #[test]
    fn dcz_without_dictionary_no_compression() {
        let mut ctx = ResponseCompressionCtx::new(3, false, false);

        let mut req = RequestHeader::build("GET", b"/test.js", None).unwrap();
        req.insert_header("accept-encoding", "dcz").unwrap();
        ctx.request_filter(&req);

        let mut resp = ResponseHeader::build(200, None).unwrap();
        resp.insert_header("content-type", "application/javascript")
            .unwrap();
        resp.insert_header("content-length", "1000").unwrap();
        ctx.response_header_filter(&mut resp, false);

        // no dictionary set, no compression applied
        assert!(resp.headers.get("content-encoding").is_none());
    }

    #[test]
    fn dcz_no_fallback_without_dictionary() {
        let mut ctx = ResponseCompressionCtx::new(3, false, false);

        let mut req = RequestHeader::build("GET", b"/test.js", None).unwrap();
        req.insert_header("accept-encoding", "dcz, br, gzip")
            .unwrap();
        ctx.request_filter(&req);

        let mut resp = ResponseHeader::build(200, None).unwrap();
        resp.insert_header("content-type", "application/javascript")
            .unwrap();
        resp.insert_header("content-length", "1000").unwrap();
        ctx.response_header_filter(&mut resp, false);

        // dcz first but no dictionary, no automatic fallback
        assert!(resp.headers.get("content-encoding").is_none());
    }

    #[test]
    fn maybe_compressor_with_dictionary_dcz_only() {
        let dict_data = DictionaryData {
            bytes: Bytes::from_static(TEST_DICTIONARY),
            hash: test_dictionary_hash(),
        };

        // only Dcz returns a compressor
        assert!(Algorithm::Dcz
            .maybe_compressor_with_dictionary(3, &dict_data)
            .is_some());
        assert!(Algorithm::Gzip
            .maybe_compressor_with_dictionary(3, &dict_data)
            .is_none());
        assert!(Algorithm::Brotli
            .maybe_compressor_with_dictionary(3, &dict_data)
            .is_none());
        assert!(Algorithm::Zstd
            .maybe_compressor_with_dictionary(3, &dict_data)
            .is_none());
        // level 0 disables
        assert!(Algorithm::Dcz
            .maybe_compressor_with_dictionary(0, &dict_data)
            .is_none());
    }

    #[test]
    fn dcz_full_flow() {
        let mut ctx = ResponseCompressionCtx::new(3, false, false);
        let hash = test_dictionary_hash();
        ctx.set_dictionary(Bytes::from_static(TEST_DICTIONARY), hash);

        let mut req = RequestHeader::build("GET", b"/app.js", None).unwrap();
        req.insert_header("accept-encoding", "dcz").unwrap();
        ctx.request_filter(&req);

        let mut resp = ResponseHeader::build(200, None).unwrap();
        resp.insert_header("content-type", "application/javascript")
            .unwrap();
        resp.insert_header("content-length", "500").unwrap();
        ctx.response_header_filter(&mut resp, false);

        assert_eq!(
            resp.headers.get("content-encoding").unwrap().as_bytes(),
            b"dcz"
        );
        assert!(resp.headers.get("content-length").is_none());
        assert_eq!(
            resp.headers.get("transfer-encoding").unwrap().as_bytes(),
            b"chunked"
        );

        let chunk1 = Bytes::from_static(b"First chunk. ");
        let output1 = ctx.response_body_filter(Some(&chunk1), false);
        assert!(output1.is_some());

        let chunk2 = Bytes::from_static(b"Second chunk.");
        let output2 = ctx.response_body_filter(Some(&chunk2), true);
        assert!(output2.is_some());

        let (name, total_in, total_out, _) = ctx.get_info().unwrap();
        assert_eq!(name, "dcz");
        assert_eq!(total_in, chunk1.len() + chunk2.len());
        assert!(total_out > 0);
    }
}


================================================
FILE: pingora-core/src/protocols/http/compression/zstd.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use super::{Encode, COMPRESSION_ERROR};
use bytes::Bytes;
use parking_lot::Mutex;
use pingora_error::{OrErr, Result};
use std::io::Write;
use std::time::{Duration, Instant};
use zstd::stream::write::Encoder;

/// [RFC 9842](https://datatracker.ietf.org/doc/html/rfc9842) magic number for dcz.
pub const DCZ_MAGIC: [u8; 8] = [0x5e, 0x2a, 0x4d, 0x18, 0x20, 0x00, 0x00, 0x00];

/// [RFC 9842](https://datatracker.ietf.org/doc/html/rfc9842) header size: 8-byte magic + 32-byte SHA-256 hash.
pub const DCZ_HEADER_SIZE: usize = 40;

pub struct Compressor {
    compress: Mutex<Encoder<'static, Vec<u8>>>,
    total_in: usize,
    total_out: usize,
    duration: Duration,
}

impl Compressor {
    pub fn new(level: u32) -> Self {
        Compressor {
            // Mutex because Encoder is not Sync
            // https://github.com/gyscos/zstd-rs/issues/186
            compress: Mutex::new(Encoder::new(vec![], level as i32).unwrap()),
            total_in: 0,
            total_out: 0,
            duration: Duration::new(0, 0),
        }
    }
}

impl Encode for Compressor {
    fn encode(&mut self, input: &[u8], end: bool) -> Result<Bytes> {
        // reserve at most 16k
        const MAX_INIT_COMPRESSED_BUF_SIZE: usize = 16 * 1024;
        let start = Instant::now();
        self.total_in += input.len();
        let mut compress = self.compress.lock();
        // reserve at most input size, cap at 16k, compressed output should be smaller
        compress
            .get_mut()
            .reserve(std::cmp::min(MAX_INIT_COMPRESSED_BUF_SIZE, input.len()));
        compress
            .write_all(input)
            .or_err(COMPRESSION_ERROR, "while compress zstd")?;
        // write to vec will never fail.
        if end {
            compress
                .do_finish()
                .or_err(COMPRESSION_ERROR, "while compress zstd")?;
        }
        self.total_out += compress.get_ref().len();
        self.duration += start.elapsed();
        Ok(std::mem::take(compress.get_mut()).into()) // into() Bytes will drop excess capacity
    }

    fn stat(&self) -> (&'static str, usize, usize, Duration) {
        ("zstd", self.total_in, self.total_out, self.duration)
    }
}

/// Dictionary compressor for [RFC 9842](https://datatracker.ietf.org/doc/html/rfc9842) (dcz).
/// Prepends [`DCZ_HEADER_SIZE`]-byte header to output.
pub struct DictionaryCompressor {
    compress: Mutex<Encoder<'static, Vec<u8>>>,
    dictionary_hash: [u8; 32],
    header_written: bool,
    total_in: usize,
    total_out: usize,
    duration: Duration,
}

impl DictionaryCompressor {
    pub fn new(level: u32, dictionary: &[u8], dictionary_hash: [u8; 32]) -> Result<Self> {
        let encoder = Encoder::with_dictionary(vec![], level as i32, dictionary).or_err(
            COMPRESSION_ERROR,
            "failed to create zstd encoder with dictionary",
        )?;

        Ok(DictionaryCompressor {
            compress: Mutex::new(encoder),
            dictionary_hash,
            header_written: false,
            total_in: 0,
            total_out: 0,
            duration: Duration::new(0, 0),
        })
    }

    fn build_header(&self) -> [u8; DCZ_HEADER_SIZE] {
        let mut header = [0u8; DCZ_HEADER_SIZE];
        header[..8].copy_from_slice(&DCZ_MAGIC);
        header[8..].copy_from_slice(&self.dictionary_hash);
        header
    }
}

impl Encode for DictionaryCompressor {
    fn encode(&mut self, input: &[u8], end: bool) -> Result<Bytes> {
        const MAX_INIT_COMPRESSED_BUF_SIZE: usize = 16 * 1024;
        let start = Instant::now();
        self.total_in += input.len();
        let mut compress = self.compress.lock();

        let reserve_size = if !self.header_written {
            DCZ_HEADER_SIZE + std::cmp::min(MAX_INIT_COMPRESSED_BUF_SIZE, input.len())
        } else {
            std::cmp::min(MAX_INIT_COMPRESSED_BUF_SIZE, input.len())
        };
        compress.get_mut().reserve(reserve_size);

        if !self.header_written {
            compress.get_mut().extend_from_slice(&self.build_header());
            self.header_written = true;
        }

        compress
            .write_all(input)
            .or_err(COMPRESSION_ERROR, "while compress dcz")?;
        if end {
            compress
                .do_finish()
                .or_err(COMPRESSION_ERROR, "while compress dcz")?;
        }
        self.total_out += compress.get_ref().len();
        self.duration += start.elapsed();
        Ok(std::mem::take(compress.get_mut()).into())
    }

    fn stat(&self) -> (&'static str, usize, usize, Duration) {
        ("dcz", self.total_in, self.total_out, self.duration)
    }
}

#[cfg(test)]
mod tests_stream {
    use super::*;

    #[test]
    fn compress_zstd_data() {
        let mut compressor = Compressor::new(11);
        let input = b"adcdefgabcdefghadcdefgabcdefghadcdefgabcdefghadcdefgabcdefgh\n";
        let compressed = compressor.encode(&input[..], false).unwrap();
        // waiting for more data
        assert!(compressed.is_empty());

        let compressed = compressor.encode(&input[..], true).unwrap();

        // the zstd Magic_Number
        assert_eq!(&compressed[..4], &[0x28, 0xB5, 0x2F, 0xFD]);
        assert!(compressed.len() < input.len());
    }
}

#[cfg(test)]
mod tests_dictionary {
    use super::*;

    const TEST_DICTIONARY: &[u8] = b"The quick brown fox jumps over the lazy dog. \
        This is a test dictionary with common words and patterns that might appear \
        in the content being compressed. HTTP headers, JSON structures, HTML tags.";

    // This is not a real SHA-256 hash as specified in
    // [RFC 9842](https://datatracker.ietf.org/doc/html/rfc9842).
    // The compression module treats the dictionary hash as opaque bytes, so any
    // 32-byte value is sufficient to test that the hash is correctly written
    // into the DCZ header.
    fn test_dictionary_hash() -> [u8; 32] {
        use std::collections::hash_map::DefaultHasher;
        use std::hash::{Hash, Hasher};

        let mut hasher = DefaultHasher::new();
        TEST_DICTIONARY.hash(&mut hasher);
        let hash = hasher.finish();

        let mut result = [0u8; 32];
        result[..8].copy_from_slice(&hash.to_le_bytes());
        result[8..16].copy_from_slice(&hash.to_be_bytes());
        for (i, byte) in result[16..32].iter_mut().enumerate() {
            *byte = ((i + 16) as u8).wrapping_mul(hash as u8);
        }
        result
    }

    #[test]
    fn compress_dcz_prepends_header() {
        let hash = test_dictionary_hash();
        let mut compressor = DictionaryCompressor::new(3, TEST_DICTIONARY, hash).unwrap();

        let input = b"The quick brown fox jumps over the lazy dog again.";
        let compressed = compressor.encode(input, true).unwrap();

        assert!(compressed.len() >= DCZ_HEADER_SIZE);
        // RFC 9842 magic
        assert_eq!(&compressed[..8], &DCZ_MAGIC);
        // dictionary hash
        assert_eq!(&compressed[8..40], &hash);
        // zstd magic follows
        assert_eq!(&compressed[40..44], &[0x28, 0xB5, 0x2F, 0xFD]);
    }

    #[test]
    fn compress_dcz_header_written_once() {
        let hash = test_dictionary_hash();
        let mut compressor = DictionaryCompressor::new(3, TEST_DICTIONARY, hash).unwrap();

        let chunk1 = compressor.encode(b"First chunk of data. ", false).unwrap();
        assert!(chunk1.len() >= DCZ_HEADER_SIZE);
        assert_eq!(&chunk1[..8], &DCZ_MAGIC);

        let chunk2 = compressor.encode(b"Second chunk of data.", true).unwrap();
        if chunk2.len() >= 8 {
            assert_ne!(&chunk2[..8], &DCZ_MAGIC);
        }
    }

    #[test]
    fn compress_dcz_stats() {
        let hash = test_dictionary_hash();
        let mut compressor = DictionaryCompressor::new(3, TEST_DICTIONARY, hash).unwrap();

        let input = b"Some test data to compress with the dictionary.";
        let _ = compressor.encode(input, true).unwrap();

        let (name, total_in, total_out, duration) = compressor.stat();
        assert_eq!(name, "dcz");
        assert_eq!(total_in, input.len());
        assert!(total_out >= DCZ_HEADER_SIZE);
        assert!(duration.as_nanos() > 0);
    }

    #[test]
    fn compress_dcz_empty_input() {
        let hash = test_dictionary_hash();
        let mut compressor = DictionaryCompressor::new(3, TEST_DICTIONARY, hash).unwrap();

        let compressed = compressor.encode(b"", true).unwrap();
        assert!(compressed.len() >= DCZ_HEADER_SIZE);
        assert_eq!(&compressed[..8], &DCZ_MAGIC);
    }

    #[test]
    fn compress_dcz_streaming() {
        let hash = test_dictionary_hash();
        let mut compressor = DictionaryCompressor::new(3, TEST_DICTIONARY, hash).unwrap();

        let chunks: &[&[u8]] = &[b"First part. ", b"Second part. ", b"Final part."];

        let mut all_output = Vec::new();
        for (i, chunk) in chunks.iter().enumerate() {
            let output = compressor.encode(chunk, i == chunks.len() - 1).unwrap();
            all_output.extend_from_slice(&output);
        }

        assert!(all_output.len() >= DCZ_HEADER_SIZE);
        assert_eq!(&all_output[..8], &DCZ_MAGIC);

        let (_, total_in, _, _) = compressor.stat();
        let expected_in: usize = chunks.iter().map(|c| c.len()).sum();
        assert_eq!(total_in, expected_in);
    }

    #[test]
    fn compress_dcz_achieves_compression() {
        let hash = test_dictionary_hash();
        let mut compressor = DictionaryCompressor::new(3, TEST_DICTIONARY, hash).unwrap();

        let input = b"The quick brown fox jumps over the lazy dog. \
            The quick brown fox jumps over the lazy dog. \
            The quick brown fox jumps over the lazy dog.";

        let compressed = compressor.encode(input, true).unwrap();
        let compressed_data_size = compressed.len() - DCZ_HEADER_SIZE;
        assert!(compressed_data_size < input.len());
    }

    #[test]
    fn compress_dcz_roundtrip() {
        let hash = test_dictionary_hash();
        let mut compressor = DictionaryCompressor::new(3, TEST_DICTIONARY, hash).unwrap();

        let input = b"The quick brown fox jumps over the lazy dog. \
            HTTP headers, JSON structures, HTML tags. \
            Common patterns that appear in web content.";
        let compressed = compressor.encode(input, true).unwrap();

        // Verify DCZ header is present then strip it
        assert!(compressed.len() >= DCZ_HEADER_SIZE);
        assert_eq!(&compressed[..8], &DCZ_MAGIC);
        let zstd_data = &compressed[DCZ_HEADER_SIZE..];

        // Decompress with the same dictionary and verify roundtrip
        let mut decoder =
            zstd::stream::read::Decoder::with_dictionary(zstd_data, TEST_DICTIONARY).unwrap();
        let mut decompressed = Vec::new();
        std::io::Read::read_to_end(&mut decoder, &mut decompressed).unwrap();

        assert_eq!(decompressed, input);
    }
}


================================================
FILE: pingora-core/src/protocols/http/conditional_filter.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Conditional filter (not modified) utilities

use http::{header::*, StatusCode};
use httpdate::{parse_http_date, HttpDate};
use pingora_error::{ErrorType::InvalidHTTPHeader, OrErr, Result};
use pingora_http::{RequestHeader, ResponseHeader};

/// Evaluates conditional headers according to the [RFC](https://datatracker.ietf.org/doc/html/rfc9111#name-handling-a-received-validat).
///
/// Returns true if the request should receive 304 Not Modified.
pub fn not_modified_filter(req: &RequestHeader, resp: &ResponseHeader) -> bool {
    // https://datatracker.ietf.org/doc/html/rfc9110#name-304-not-modified
    // 304 can only validate 200
    if resp.status != StatusCode::OK {
        return false;
    }

    // Evulation of conditional headers, based on RFC:
    // https://datatracker.ietf.org/doc/html/rfc9111#name-handling-a-received-validat

    // TODO: If-Match and If-Unmodified-Since, and returning 412 Precondition Failed
    // Note that this function is currently used only for proxy cache,
    // and the current RFCs have some conflicting opinions as to whether
    // If-Match and If-Unmodified-Since can be used. https://github.com/httpwg/http-core/issues/1111

    // Conditional request precedence:
    // https://datatracker.ietf.org/doc/html/rfc9110#name-precedence-of-preconditions
    // If-None-Match should be handled before If-Modified-Since.
    // XXX: In nginx, IMS is actually checked first, which may cause compatibility issues
    // for certain origins/clients.

    if req.headers.contains_key(IF_NONE_MATCH) {
        if let Some(etag) = resp.headers.get(ETAG) {
            for inm in req.headers.get_all(IF_NONE_MATCH) {
                if weak_validate_etag(inm.as_bytes(), etag.as_bytes()) {
                    return true;
                }
            }
        }
        // https://datatracker.ietf.org/doc/html/rfc9110#field.if-modified-since
        // "MUST ignore If-Modified-Since if the request contains an If-None-Match header"
        return false;
    }

    // GET/HEAD only https://datatracker.ietf.org/doc/html/rfc9110#field.if-modified-since
    if matches!(req.method, http::Method::GET | http::Method::HEAD) {
        if let Ok(Some(if_modified_since)) = req_header_as_http_date(req, &IF_MODIFIED_SINCE) {
            if let Ok(Some(last_modified)) = resp_header_as_http_date(resp, &LAST_MODIFIED) {
                if if_modified_since >= last_modified {
                    return true;
                }
            }
        }
    }
    false
}

// Trim ASCII whitespace bytes from the start of the slice.
// This is pretty much copied from the nightly API.
// TODO: use `trim_ascii_start` when it stabilizes https://doc.rust-lang.org/std/primitive.slice.html#method.trim_ascii_start
fn trim_ascii_start(mut bytes: &[u8]) -> &[u8] {
    while let [first, rest @ ..] = bytes {
        if first.is_ascii_whitespace() {
            bytes = rest;
        } else {
            break;
        }
    }
    bytes
}

/// Search for an ETag matching `target_etag` from the input header, using
/// [weak comparison](https://datatracker.ietf.org/doc/html/rfc9110#section-8.8.3.2).
/// Multiple ETags can exist in the header as a comma-separated list.
///
/// Returns true if a matching ETag exists.
pub fn weak_validate_etag(input_etag_header: &[u8], target_etag: &[u8]) -> bool {
    // ETag comparison: https://datatracker.ietf.org/doc/html/rfc9110#section-8.8.3.2
    fn strip_weak_prefix(etag: &[u8]) -> &[u8] {
        // Weak ETags are prefaced with `W/`
        etag.strip_prefix(b"W/").unwrap_or(etag)
    }
    // https://datatracker.ietf.org/doc/html/rfc9110#section-13.1.2 unsafe method only
    if input_etag_header == b"*" {
        return true;
    }

    // The RFC defines ETags here: https://datatracker.ietf.org/doc/html/rfc9110#section-8.8.3
    // The RFC requires ETags to be wrapped in double quotes, though some legacy origins or clients
    // don't adhere to this.
    // Unfortunately by allowing non-quoted etags, parsing becomes a little more complicated.
    //
    // This implementation uses nginx's algorithm for parsing ETags, which can handle both quoted
    // and non-quoted ETags. It essentially does a substring comparison at each comma divider,
    // searching for an exact match of the ETag (optional double quotes included) followed by
    // either EOF or another comma.
    //
    // Clients and upstreams should still ideally adhere to quoted ETags to disambiguate
    // situations where commas are contained within the ETag (allowed by the RFC).
    // XXX: This nginx algorithm will handle matching against ETags with commas correctly, but only
    // if the target ETag is a quoted RFC-compliant ETag.
    //
    // For example, consider an if-none-match header: `"xyzzy,xyz,x,y", "xyzzy"`.
    // If the target ETag is double quoted as mandated by the RFC like `"xyz,x"`, this algorithm
    // will correctly report no matching ETag.
    // But if the target ETag is not double quoted like `xyz,x`, it will "incorrectly" match
    // against the substring after the first comma inside the first quoted ETag.

    // Search for the target at each comma delimiter
    let target_etag = strip_weak_prefix(target_etag);
    let mut remaining = strip_weak_prefix(input_etag_header);
    while let Some(search_slice) = remaining.get(0..target_etag.len()) {
        if search_slice == target_etag {
            remaining = &remaining[target_etag.len()..];
            // check if there's any content after the matched substring
            // skip any whitespace
            remaining = trim_ascii_start(remaining);
            if matches!(remaining.first(), None | Some(b',')) {
                // we are either at the end of the header, or at a comma delimiter
                // which means this is a match
                return true;
            }
        }
        // find the next delimiter (ignore any remaining part of the non-matching etag)
        let Some(next_delimiter_pos) = remaining.iter().position(|&b| b == b',') else {
            break;
        };
        remaining = &remaining[next_delimiter_pos..];
        // find the next etag slice to compare
        // ignore extraneous delimiters and whitespace
        let Some(next_etag_pos) = remaining
            .iter()
            .position(|&b| !b.is_ascii_whitespace() && b != b',')
        else {
            break;
        };
        remaining = &remaining[next_etag_pos..];

        remaining = strip_weak_prefix(remaining);
    }
    // remaining length < target etag length
    false
}

/// Utility function to parse an HTTP request header as an [HTTP-date](https://datatracker.ietf.org/doc/html/rfc9110#name-date-time-formats).
pub fn req_header_as_http_date<H>(req: &RequestHeader, header_name: H) -> Result<Option<HttpDate>>
where
    H: AsHeaderName,
{
    let Some(header_value) = req.headers.get(header_name) else {
        return Ok(None);
    };
    Ok(Some(parse_bytes_as_http_date(header_value.as_bytes())?))
}

/// Utility function to parse an HTTP response header as an [HTTP-date](https://datatracker.ietf.org/doc/html/rfc9110#name-date-time-formats).
pub fn resp_header_as_http_date<H>(
    resp: &ResponseHeader,
    header_name: H,
) -> Result<Option<HttpDate>>
where
    H: AsHeaderName,
{
    let Some(header_value) = resp.headers.get(header_name) else {
        return Ok(None);
    };
    Ok(Some(parse_bytes_as_http_date(header_value.as_bytes())?))
}

fn parse_bytes_as_http_date(bytes: &[u8]) -> Result<HttpDate> {
    let input_time = std::str::from_utf8(bytes).explain_err(InvalidHTTPHeader, |_| {
        "HTTP date has unsupported characters (bytes outside of UTF-8)"
    })?;
    Ok(parse_http_date(input_time)
        .or_err(InvalidHTTPHeader, "Invalid HTTP date")?
        .into())
}

/// Utility function to convert the input response header to a 304 Not Modified response.
pub fn to_304(resp: &mut ResponseHeader) {
    // https://datatracker.ietf.org/doc/html/rfc9110#name-304-not-modified
    // XXX: https://datatracker.ietf.org/doc/html/rfc9110#name-content-length
    // "A server may send content-length in 304", but no common web server does it
    // So we drop both content-length and content-type for consistency/less surprise
    resp.set_status(StatusCode::NOT_MODIFIED).unwrap();
    resp.remove_header(&CONTENT_LENGTH);
    resp.remove_header(&CONTENT_TYPE);
    // https://datatracker.ietf.org/doc/html/rfc9110#section-15.4.5-4
    // "SHOULD NOT generate representation metadata other than the above listed fields
    // unless said metadata exists for the purpose of guiding cache updates"
    // Remove some more representation metadata headers
    resp.remove_header(&TRANSFER_ENCODING);
    // note that the following are also stripped by nginx
    resp.remove_header(&CONTENT_ENCODING);
    resp.remove_header(&ACCEPT_RANGES);
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_if_modified_since() {
        fn build_req(if_modified_since: &[u8]) -> RequestHeader {
            let mut req = RequestHeader::build("GET", b"/", None).unwrap();
            req.insert_header("If-Modified-Since", if_modified_since)
                .unwrap();
            req
        }

        fn build_resp(last_modified: &[u8]) -> ResponseHeader {
            let mut resp = ResponseHeader::build(200, None).unwrap();
            resp.insert_header("Last-Modified", last_modified).unwrap();
            resp
        }

        // same date
        let last_modified = b"Fri, 26 Mar 2010 00:05:00 GMT";
        let req = build_req(b"Fri, 26 Mar 2010 00:05:00 GMT");
        let resp = build_resp(last_modified);
        assert!(not_modified_filter(&req, &resp));

        // before
        let req = build_req(b"Fri, 26 Mar 2010 00:03:00 GMT");
        let resp = build_resp(last_modified);
        assert!(!not_modified_filter(&req, &resp));

        // after
        let req = build_req(b"Sun, 28 Mar 2010 01:07:00 GMT");
        let resp = build_resp(last_modified);
        assert!(not_modified_filter(&req, &resp));
    }

    #[test]
    fn test_weak_validate_etag() {
        let target_weak_etag = br#"W/"xyzzy""#;
        let target_etag = br#""xyzzy""#;
        assert!(weak_validate_etag(b"*", target_weak_etag));
        assert!(weak_validate_etag(b"*", target_etag));

        assert!(weak_validate_etag(target_etag, target_etag));
        assert!(weak_validate_etag(target_etag, target_weak_etag));
        assert!(weak_validate_etag(target_weak_etag, target_etag));
        assert!(weak_validate_etag(target_weak_etag, target_weak_etag));

        let mismatch_weak_etag = br#"W/"abc""#;
        let mismatch_etag = br#""abc""#;
        assert!(!weak_validate_etag(mismatch_etag, target_etag));
        assert!(!weak_validate_etag(mismatch_etag, target_weak_etag));
        assert!(!weak_validate_etag(mismatch_weak_etag, target_etag));
        assert!(!weak_validate_etag(mismatch_weak_etag, target_weak_etag));

        let multiple_etags = br#"a, "xyzzy","r2d2xxxx", "c3piozzzz",zzzfoo"#;
        assert!(weak_validate_etag(multiple_etags, target_etag));
        assert!(weak_validate_etag(multiple_etags, target_weak_etag));

        let multiple_mismatch_etags = br#"foobar", "r2d2xxxx", "c3piozzzz",zzzfoo"#;
        assert!(!weak_validate_etag(multiple_mismatch_etags, target_etag));
        assert!(!weak_validate_etag(
            multiple_mismatch_etags,
            target_weak_etag
        ));

        let multiple_mismatch_etags =
            br#"foobar", "r2d2xxxxyzzy", "c3piozzzz",zzzfoo, "xyzzy,xyzzy""#;
        assert!(!weak_validate_etag(multiple_mismatch_etags, target_etag));
        assert!(!weak_validate_etag(
            multiple_mismatch_etags,
            target_weak_etag
        ));

        let target_comma_etag = br#"",,,""#;
        let multiple_mismatch_etags = br#",", ",,,,", ,,,,,,,,",,",",,,,,,""#;
        assert!(!weak_validate_etag(
            multiple_mismatch_etags,
            target_comma_etag
        ));
        let multiple_etags = br#",", ",,,,", ,,,,,,,,",,,",",,,,,,""#;
        assert!(weak_validate_etag(multiple_etags, target_comma_etag));
    }

    #[test]
    fn test_weak_validate_etag_unquoted() {
        // legacy unquoted etag
        let target_unquoted = b"xyzzy";
        assert!(weak_validate_etag(b"*", target_unquoted));

        let strong_etag = br#""xyzzy""#;
        assert!(!weak_validate_etag(strong_etag, target_unquoted));
        assert!(!weak_validate_etag(target_unquoted, strong_etag));

        let multiple_etags = br#"a, "r2d2xxxx", "c3piozzzz",   xyzzy"#;
        assert!(weak_validate_etag(multiple_etags, target_unquoted));

        let multiple_mismatch_etags =
            br#"foobar", "r2d2xxxxyzzy", "c3piozzzz",zzzfoo, "xyzzy,xyzzy""#;
        assert!(!weak_validate_etag(
            multiple_mismatch_etags,
            target_unquoted
        ));

        // in certain edge cases where commas are used alongside quoted ETags,
        // the test can fail if target is unquoted (the last ETag is intended to be one ETag)
        let multiple_mismatch_etags =
            br#"foobar", "r2d2xxxxyzzy", "c3piozzzz",zzzfoo, "xyzzy,xyzzy,xy""#;
        assert!(weak_validate_etag(multiple_mismatch_etags, target_unquoted));
    }
}


================================================
FILE: pingora-core/src/protocols/http/custom/client.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::time::Duration;

use async_trait::async_trait;
use bytes::Bytes;
use futures::Stream;
use http::HeaderMap;
use pingora_error::Result;
use pingora_http::{RequestHeader, ResponseHeader};

use crate::protocols::{l4::socket::SocketAddr, Digest, UniqueIDType};

use super::{BodyWrite, CustomMessageWrite};

#[doc(hidden)]
#[async_trait]
pub trait Session: Send + Sync + Unpin + 'static {
    async fn write_request_header(&mut self, req: Box<RequestHeader>, end: bool) -> Result<()>;

    async fn write_request_body(&mut self, data: Bytes, end: bool) -> Result<()>;

    async fn finish_request_body(&mut self) -> Result<()>;

    fn set_read_timeout(&mut self, timeout: Option<Duration>);

    fn set_write_timeout(&mut self, timeout: Option<Duration>);

    async fn read_response_header(&mut self) -> Result<()>;

    async fn read_response_body(&mut self) -> Result<Option<Bytes>>;

    fn response_finished(&self) -> bool;

    async fn shutdown(&mut self, code: u32, ctx: &str);

    fn response_header(&self) -> Option<&ResponseHeader>;

    fn was_upgraded(&self) -> bool;

    fn digest(&self) -> Option<&Digest>;

    fn digest_mut(&mut self) -> Option<&mut Digest>;

    fn server_addr(&self) -> Option<&SocketAddr>;

    fn client_addr(&self) -> Option<&SocketAddr>;

    async fn read_trailers(&mut self) -> Result<Option<HeaderMap>>;

    fn fd(&self) -> UniqueIDType;

    async fn check_response_end_or_error(&mut self, headers: bool) -> Result<bool>;

    fn take_request_body_writer(&mut self) -> Option<Box<dyn BodyWrite>>;

    async fn finish_custom(&mut self) -> Result<()>;

    fn take_custom_message_reader(
        &mut self,
    ) -> Option<Box<dyn Stream<Item = Result<Bytes>> + Unpin + Send + Sync + 'static>>;

    async fn drain_custom_messages(&mut self) -> Result<()>;

    fn take_custom_message_writer(&mut self) -> Option<Box<dyn CustomMessageWrite>>;
}

#[doc(hidden)]
#[async_trait]
impl Session for () {
    async fn write_request_header(&mut self, _req: Box<RequestHeader>, _end: bool) -> Result<()> {
        unreachable!("client session: write_request_header")
    }

    async fn write_request_body(&mut self, _data: Bytes, _end: bool) -> Result<()> {
        unreachable!("client session: write_request_body")
    }

    async fn finish_request_body(&mut self) -> Result<()> {
        unreachable!("client session: finish_request_body")
    }

    fn set_read_timeout(&mut self, _timeout: Option<Duration>) {
        unreachable!("client session: set_read_timeout")
    }

    fn set_write_timeout(&mut self, _timeout: Option<Duration>) {
        unreachable!("client session: set_write_timeout")
    }

    async fn read_response_header(&mut self) -> Result<()> {
        unreachable!("client session: read_response_header")
    }

    async fn read_response_body(&mut self) -> Result<Option<Bytes>> {
        unreachable!("client session: read_response_body")
    }

    fn response_finished(&self) -> bool {
        unreachable!("client session: response_finished")
    }

    async fn shutdown(&mut self, _code: u32, _ctx: &str) {
        unreachable!("client session: shutdown")
    }

    fn response_header(&self) -> Option<&ResponseHeader> {
        unreachable!("client session: response_header")
    }

    fn was_upgraded(&self) -> bool {
        unreachable!("client session: was upgraded")
    }

    fn digest(&self) -> Option<&Digest> {
        unreachable!("client session: digest")
    }

    fn digest_mut(&mut self) -> Option<&mut Digest> {
        unreachable!("client session: digest_mut")
    }

    fn server_addr(&self) -> Option<&SocketAddr> {
        unreachable!("client session: server_addr")
    }

    fn client_addr(&self) -> Option<&SocketAddr> {
        unreachable!("client session: client_addr")
    }

    async fn finish_custom(&mut self) -> Result<()> {
        unreachable!("client session: finish_custom")
    }

    async fn read_trailers(&mut self) -> Result<Option<HeaderMap>> {
        unreachable!("client session: read_trailers")
    }

    fn fd(&self) -> UniqueIDType {
        unreachable!("client session: fd")
    }

    async fn check_response_end_or_error(&mut self, _headers: bool) -> Result<bool> {
        unreachable!("client session: check_response_end_or_error")
    }

    fn take_custom_message_reader(
        &mut self,
    ) -> Option<Box<dyn Stream<Item = Result<Bytes>> + Unpin + Send + Sync + 'static>> {
        unreachable!("client session: get_custom_message_reader")
    }

    async fn drain_custom_messages(&mut self) -> Result<()> {
        unreachable!("client session: drain_custom_messages")
    }

    fn take_custom_message_writer(&mut self) -> Option<Box<dyn CustomMessageWrite>> {
        unreachable!("client session: get_custom_message_writer")
    }

    fn take_request_body_writer(&mut self) -> Option<Box<dyn BodyWrite>> {
        unreachable!("client session: take_request_body_writer")
    }
}


================================================
FILE: pingora-core/src/protocols/http/custom/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::time::Duration;

use async_trait::async_trait;
use bytes::Bytes;
use futures::Stream;
use log::debug;
use pingora_error::Result;
use tokio_stream::StreamExt;

pub mod client;
pub mod server;

pub const CUSTOM_MESSAGE_QUEUE_SIZE: usize = 128;

pub fn is_informational_except_101<T: PartialOrd<u32>>(code: T) -> bool {
    // excluding `101 Switching Protocols`, because it's not followed by any other
    // response and it's a final
    // The WebSocket Protocol https://datatracker.ietf.org/doc/html/rfc6455
    code > 99 && code < 200 && code != 101
}

#[async_trait]
pub trait CustomMessageWrite: Send + Sync + Unpin + 'static {
    fn set_write_timeout(&mut self, timeout: Option<Duration>);
    async fn write_custom_message(&mut self, msg: Bytes) -> Result<()>;
    async fn finish_custom(&mut self) -> Result<()>;
}

#[doc(hidden)]
#[async_trait]
impl CustomMessageWrite for () {
    fn set_write_timeout(&mut self, _timeout: Option<Duration>) {}

    async fn write_custom_message(&mut self, msg: Bytes) -> Result<()> {
        debug!("write_custom_message: {:?}", msg);
        Ok(())
    }

    async fn finish_custom(&mut self) -> Result<()> {
        debug!("finish_custom");
        Ok(())
    }
}

#[async_trait]
pub trait BodyWrite: Send + Sync + Unpin + 'static {
    async fn write_all_buf(&mut self, data: &mut Bytes) -> Result<()>;
    async fn finish(&mut self) -> Result<()>;
    async fn cleanup(&mut self) -> Result<()>;
    fn upgrade_body_writer(&mut self);
}

pub async fn drain_custom_messages(
    reader: Option<Box<dyn Stream<Item = Result<Bytes>> + Unpin + Send + Sync + 'static>>,
) -> Result<()> {
    let Some(mut reader) = reader else {
        return Ok(());
    };

    while let Some(res) = reader.next().await {
        let msg = res?;
        debug!("consume_custom_messages: {msg:?}");
    }

    Ok(())
}

#[macro_export]
macro_rules! custom_session {
    ($base_obj:ident . $($method_tokens:tt)+) => {
        if let Some(custom_session) = $base_obj.as_custom_mut() {
            #[allow(clippy::semicolon_if_nothing_returned)]
            custom_session.$($method_tokens)+;
        }
    };
}


================================================
FILE: pingora-core/src/protocols/http/custom/server.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::time::Duration;

use async_trait::async_trait;
use bytes::Bytes;
use futures::Stream;
use http::HeaderMap;
use pingora_error::Result;
use pingora_http::{RequestHeader, ResponseHeader};

use crate::protocols::{http::HttpTask, l4::socket::SocketAddr, Digest};

use super::CustomMessageWrite;

#[doc(hidden)]
#[async_trait]
pub trait Session: Send + Sync + Unpin + 'static {
    fn req_header(&self) -> &RequestHeader;

    fn req_header_mut(&mut self) -> &mut RequestHeader;

    async fn read_body_bytes(&mut self) -> Result<Option<Bytes>>;

    async fn drain_request_body(&mut self) -> Result<()>;

    async fn write_response_header(&mut self, resp: Box<ResponseHeader>, end: bool) -> Result<()>;

    async fn write_response_header_ref(&mut self, resp: &ResponseHeader, end: bool) -> Result<()>;

    async fn write_body(&mut self, data: Bytes, end: bool) -> Result<()>;

    async fn write_trailers(&mut self, trailers: HeaderMap) -> Result<()>;

    async fn response_duplex_vec(&mut self, tasks: Vec<HttpTask>) -> Result<bool>;

    fn set_read_timeout(&mut self, timeout: Option<Duration>);

    fn get_read_timeout(&self) -> Option<Duration>;

    fn set_write_timeout(&mut self, timeout: Option<Duration>);

    fn get_write_timeout(&self) -> Option<Duration>;

    fn set_total_drain_timeout(&mut self, timeout: Option<Duration>);

    fn get_total_drain_timeout(&self) -> Option<Duration>;

    fn request_summary(&self) -> String;

    fn response_written(&self) -> Option<&ResponseHeader>;

    async fn shutdown(&mut self, code: u32, ctx: &str);

    fn is_body_done(&mut self) -> bool;

    async fn finish(&mut self) -> Result<()>;

    fn is_body_empty(&mut self) -> bool;

    async fn read_body_or_idle(&mut self, no_body_expected: bool) -> Result<Option<Bytes>>;

    fn body_bytes_sent(&self) -> usize;

    fn body_bytes_read(&self) -> usize;

    fn digest(&self) -> Option<&Digest>;

    fn digest_mut(&mut self) -> Option<&mut Digest>;

    fn client_addr(&self) -> Option<&SocketAddr>;

    fn server_addr(&self) -> Option<&SocketAddr>;

    fn pseudo_raw_h1_request_header(&self) -> Bytes;

    fn enable_retry_buffering(&mut self);

    fn retry_buffer_truncated(&self) -> bool;

    fn get_retry_buffer(&self) -> Option<Bytes>;

    async fn finish_custom(&mut self) -> Result<()>;

    fn take_custom_message_reader(
        &mut self,
    ) -> Option<Box<dyn Stream<Item = Result<Bytes>> + Unpin + Send + Sync + 'static>>;

    fn restore_custom_message_reader(
        &mut self,
        reader: Box<dyn Stream<Item = Result<Bytes>> + Unpin + Send + Sync + 'static>,
    ) -> Result<()>;

    fn take_custom_message_writer(&mut self) -> Option<Box<dyn CustomMessageWrite>>;

    fn restore_custom_message_writer(&mut self, writer: Box<dyn CustomMessageWrite>) -> Result<()>;

    /// Whether this request is for upgrade (e.g., websocket).
    ///
    /// Returns `true` if the request has HTTP/1.1 version and contains an Upgrade header.
    fn is_upgrade_req(&self) -> bool {
        false
    }

    /// Whether this session was fully upgraded (completed Upgrade handshake).
    ///
    /// Returns `true` if the request was an upgrade request and a 101 response was sent.
    fn was_upgraded(&self) -> bool {
        false
    }
}

#[doc(hidden)]
#[async_trait]
impl Session for () {
    fn req_header(&self) -> &RequestHeader {
        unreachable!("server session: req_header")
    }

    fn req_header_mut(&mut self) -> &mut RequestHeader {
        unreachable!("server session: req_header_mut")
    }

    async fn read_body_bytes(&mut self) -> Result<Option<Bytes>> {
        unreachable!("server session: read_body_bytes")
    }

    async fn drain_request_body(&mut self) -> Result<()> {
        unreachable!("server session: drain_request_body")
    }

    async fn write_response_header(
        &mut self,
        _resp: Box<ResponseHeader>,
        _end: bool,
    ) -> Result<()> {
        unreachable!("server session: write_response_header")
    }

    async fn write_response_header_ref(
        &mut self,
        _resp: &ResponseHeader,
        _end: bool,
    ) -> Result<()> {
        unreachable!("server session: write_response_header_ref")
    }

    async fn write_body(&mut self, _data: Bytes, _end: bool) -> Result<()> {
        unreachable!("server session: write_body")
    }

    async fn write_trailers(&mut self, _trailers: HeaderMap) -> Result<()> {
        unreachable!("server session: write_trailers")
    }

    async fn response_duplex_vec(&mut self, _tasks: Vec<HttpTask>) -> Result<bool> {
        unreachable!("server session: response_duplex_vec")
    }

    fn set_read_timeout(&mut self, _timeout: Option<Duration>) {
        unreachable!("server session: set_read_timeout")
    }

    fn get_read_timeout(&self) -> Option<Duration> {
        unreachable!("server_session: get_read_timeout")
    }

    fn set_write_timeout(&mut self, _timeout: Option<Duration>) {
        unreachable!("server session: set_write_timeout")
    }

    fn get_write_timeout(&self) -> Option<Duration> {
        unreachable!("server_session: get_write_timeout")
    }

    fn set_total_drain_timeout(&mut self, _timeout: Option<Duration>) {
        unreachable!("server session: set_total_drain_timeout")
    }

    fn get_total_drain_timeout(&self) -> Option<Duration> {
        unreachable!("server_session: get_total_drain_timeout")
    }

    fn request_summary(&self) -> String {
        unreachable!("server session: request_summary")
    }

    fn response_written(&self) -> Option<&ResponseHeader> {
        unreachable!("server session: response_written")
    }

    async fn shutdown(&mut self, _code: u32, _ctx: &str) {
        unreachable!("server session: shutdown")
    }

    fn is_body_done(&mut self) -> bool {
        unreachable!("server session: is_body_done")
    }

    async fn finish(&mut self) -> Result<()> {
        unreachable!("server session: finish")
    }

    fn is_body_empty(&mut self) -> bool {
        unreachable!("server session: is_body_empty")
    }

    async fn read_body_or_idle(&mut self, _no_body_expected: bool) -> Result<Option<Bytes>> {
        unreachable!("server session: read_body_or_idle")
    }

    fn body_bytes_sent(&self) -> usize {
        unreachable!("server session: body_bytes_sent")
    }

    fn body_bytes_read(&self) -> usize {
        unreachable!("server session: body_bytes_read")
    }

    fn digest(&self) -> Option<&Digest> {
        unreachable!("server session: digest")
    }

    fn digest_mut(&mut self) -> Option<&mut Digest> {
        unreachable!("server session: digest_mut")
    }

    fn client_addr(&self) -> Option<&SocketAddr> {
        unreachable!("server session: client_addr")
    }

    fn server_addr(&self) -> Option<&SocketAddr> {
        unreachable!("server session: server_addr")
    }

    fn pseudo_raw_h1_request_header(&self) -> Bytes {
        unreachable!("server session: pseudo_raw_h1_request_header")
    }

    fn enable_retry_buffering(&mut self) {
        unreachable!("server session: enable_retry_bufferings")
    }

    fn retry_buffer_truncated(&self) -> bool {
        unreachable!("server session: retry_buffer_truncated")
    }

    fn get_retry_buffer(&self) -> Option<Bytes> {
        unreachable!("server session: get_retry_buffer")
    }

    async fn finish_custom(&mut self) -> Result<()> {
        unreachable!("server session: finish_custom")
    }

    fn take_custom_message_reader(
        &mut self,
    ) -> Option<Box<dyn Stream<Item = Result<Bytes>> + Unpin + Send + Sync + 'static>> {
        unreachable!("server session: get_custom_message_reader")
    }

    fn restore_custom_message_reader(
        &mut self,
        _reader: Box<dyn Stream<Item = Result<Bytes>> + Unpin + Send + Sync + 'static>,
    ) -> Result<()> {
        unreachable!("server session: get_custom_message_reader")
    }

    fn take_custom_message_writer(&mut self) -> Option<Box<dyn CustomMessageWrite>> {
        unreachable!("server session: get_custom_message_writer")
    }

    fn restore_custom_message_writer(
        &mut self,
        _writer: Box<dyn CustomMessageWrite>,
    ) -> Result<()> {
        unreachable!("server session: restore_custom_message_writer")
    }

    fn is_upgrade_req(&self) -> bool {
        unreachable!("server session: is_upgrade_req")
    }

    fn was_upgraded(&self) -> bool {
        unreachable!("server session: was_upgraded")
    }
}


================================================
FILE: pingora-core/src/protocols/http/date.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use chrono::DateTime;
use http::header::HeaderValue;
use std::cell::RefCell;
use std::time::{Duration, SystemTime};

fn to_date_string(epoch_sec: i64) -> String {
    let dt = DateTime::from_timestamp(epoch_sec, 0).unwrap();
    dt.format("%a, %d %b %Y %H:%M:%S GMT").to_string()
}

struct CacheableDate {
    h1_date: HeaderValue,
    epoch: Duration,
}

impl CacheableDate {
    pub fn new() -> Self {
        let d = SystemTime::now()
            .duration_since(SystemTime::UNIX_EPOCH)
            .unwrap();
        CacheableDate {
            h1_date: HeaderValue::from_str(&to_date_string(d.as_secs() as i64)).unwrap(),
            epoch: d,
        }
    }

    pub fn update(&mut self, d_now: Duration) {
        if d_now.as_secs() != self.epoch.as_secs() {
            self.epoch = d_now;
            self.h1_date = HeaderValue::from_str(&to_date_string(d_now.as_secs() as i64)).unwrap();
        }
    }

    pub fn get_date(&mut self) -> HeaderValue {
        let d = SystemTime::now()
            .duration_since(SystemTime::UNIX_EPOCH)
            .unwrap();
        self.update(d);
        self.h1_date.clone()
    }
}

thread_local! {
    static CACHED_DATE: RefCell<CacheableDate>
        = RefCell::new(CacheableDate::new());
}

pub fn get_cached_date() -> HeaderValue {
    CACHED_DATE.with(|cache_date| (*cache_date.borrow_mut()).get_date())
}

#[cfg(test)]
mod test {
    use super::*;

    fn now_date_header() -> HeaderValue {
        HeaderValue::from_str(&to_date_string(
            SystemTime::now()
                .duration_since(SystemTime::UNIX_EPOCH)
                .unwrap()
                .as_secs() as i64,
        ))
        .unwrap()
    }

    #[test]
    fn test_date_string() {
        let date_str = to_date_string(1);
        assert_eq!("Thu, 01 Jan 1970 00:00:01 GMT", date_str);
    }

    #[test]
    fn test_date_cached() {
        assert_eq!(get_cached_date(), now_date_header());
    }
}


================================================
FILE: pingora-core/src/protocols/http/error_resp.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Error response generating utilities.

use http::header;
use once_cell::sync::Lazy;
use pingora_http::ResponseHeader;

use super::SERVER_NAME;

/// Generate an error response with the given status code.
///
/// This error response has a zero `Content-Length` and `Cache-Control: private, no-store`.
pub fn gen_error_response(code: u16) -> ResponseHeader {
    let mut resp = ResponseHeader::build(code, Some(4)).unwrap();
    resp.insert_header(header::SERVER, &SERVER_NAME[..])
        .unwrap();
    resp.insert_header(header::DATE, "Sun, 06 Nov 1994 08:49:37 GMT")
        .unwrap(); // placeholder
    resp.insert_header(header::CONTENT_LENGTH, "0").unwrap();
    resp.insert_header(header::CACHE_CONTROL, "private, no-store")
        .unwrap();
    resp
}

/// Pre-generated 502 response
pub static HTTP_502_RESPONSE: Lazy<ResponseHeader> = Lazy::new(|| gen_error_response(502));
/// Pre-generated 400 response
pub static HTTP_400_RESPONSE: Lazy<ResponseHeader> = Lazy::new(|| gen_error_response(400));


================================================
FILE: pingora-core/src/protocols/http/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! HTTP/1.x and HTTP/2 implementation APIs

pub mod body_buffer;
pub mod bridge;
pub mod client;
pub mod compression;
pub mod conditional_filter;
pub mod custom;
pub mod date;
pub mod error_resp;
pub mod server;
pub mod subrequest;
pub mod v1;
pub mod v2;

pub use server::Session as ServerSession;

/// The Pingora server name string
pub const SERVER_NAME: &[u8; 7] = b"Pingora";

/// An enum to hold all possible HTTP response events.
#[derive(Debug)]
pub enum HttpTask {
    /// the response header and the boolean end of response flag
    Header(Box<pingora_http::ResponseHeader>, bool),
    /// A piece of request or response body and the end of request/response boolean flag.
    Body(Option<bytes::Bytes>, bool),
    /// Request or response body bytes that have been upgraded on H1.1, and EOF bool flag.
    UpgradedBody(Option<bytes::Bytes>, bool),
    /// HTTP response trailer
    Trailer(Option<Box<http::HeaderMap>>),
    /// Signal that the response is already finished
    Done,
    /// Signal that the reading of the response encountered errors.
    Failed(pingora_error::BError),
}

impl HttpTask {
    /// Whether this [`HttpTask`] means the end of the response.
    pub fn is_end(&self) -> bool {
        match self {
            HttpTask::Header(_, end) => *end,
            HttpTask::Body(_, end) => *end,
            HttpTask::UpgradedBody(_, end) => *end,
            HttpTask::Trailer(_) => true,
            HttpTask::Done => true,
            HttpTask::Failed(_) => true,
        }
    }

    /// The [`HttpTask`] type as string.
    pub fn type_str(&self) -> &'static str {
        match self {
            HttpTask::Header(..) => "Header",
            HttpTask::Body(..) => "Body",
            HttpTask::UpgradedBody(..) => "UpgradedBody",
            HttpTask::Trailer(_) => "Trailer",
            HttpTask::Done => "Done",
            HttpTask::Failed(_) => "Failed",
        }
    }
}


================================================
FILE: pingora-core/src/protocols/http/server.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! HTTP server session APIs

use super::custom::server::Session as SessionCustom;
use super::error_resp;
use super::subrequest::server::HttpSession as SessionSubrequest;
use super::v1::server::HttpSession as SessionV1;
use super::v2::server::HttpSession as SessionV2;
use super::HttpTask;
use crate::custom_session;
use crate::protocols::{Digest, SocketAddr, Stream};
use bytes::Bytes;
use http::HeaderValue;
use http::{header::AsHeaderName, HeaderMap};
use pingora_error::{Error, Result};
use pingora_http::{RequestHeader, ResponseHeader};
use std::time::Duration;

/// HTTP server session object for both HTTP/1.x and HTTP/2
pub enum Session {
    H1(SessionV1),
    H2(SessionV2),
    Subrequest(SessionSubrequest),
    Custom(Box<dyn SessionCustom>),
}

impl Session {
    /// Create a new [`Session`] from an established connection for HTTP/1.x
    pub fn new_http1(stream: Stream) -> Self {
        Self::H1(SessionV1::new(stream))
    }

    /// Create a new [`Session`] from an established HTTP/2 stream
    pub fn new_http2(session: SessionV2) -> Self {
        Self::H2(session)
    }

    /// Create a new [`Session`] from a subrequest session
    pub fn new_subrequest(session: SessionSubrequest) -> Self {
        Self::Subrequest(session)
    }

    /// Create a new [`Session`] from a custom session
    pub fn new_custom(session: Box<dyn SessionCustom>) -> Self {
        Self::Custom(session)
    }

    /// Whether the session is HTTP/2. If not it is HTTP/1.x
    pub fn is_http2(&self) -> bool {
        matches!(self, Self::H2(_))
    }

    /// Whether the session is for a subrequest.
    pub fn is_subrequest(&self) -> bool {
        matches!(self, Self::Subrequest(_))
    }

    /// Whether the session is Custom
    pub fn is_custom(&self) -> bool {
        matches!(self, Self::Custom(_))
    }

    /// Read the request header. This method is required to be called first before doing anything
    /// else with the session.
    /// - `Ok(true)`: successful
    /// - `Ok(false)`: client exit without sending any bytes. This is normal on reused connection.
    ///   In this case the user should give up this session.
    pub async fn read_request(&mut self) -> Result<bool> {
        match self {
            Self::H1(s) => {
                let read = s.read_request().await?;
                Ok(read.is_some())
            }
            // This call will always return `Ok(true)` for Http2 because the request is already read
            Self::H2(_) => Ok(true),
            Self::Subrequest(s) => {
                let read = s.read_request().await?;
                Ok(read.is_some())
            }
            Self::Custom(_) => Ok(true),
        }
    }

    /// Return the request header it just read.
    /// # Panic
    /// This function will panic if [`Self::read_request()`] is not called.
    pub fn req_header(&self) -> &RequestHeader {
        match self {
            Self::H1(s) => s.req_header(),
            Self::H2(s) => s.req_header(),
            Self::Subrequest(s) => s.req_header(),
            Self::Custom(s) => s.req_header(),
        }
    }

    /// Return a mutable reference to request header it just read.
    /// # Panic
    /// This function will panic if [`Self::read_request()`] is not called.
    pub fn req_header_mut(&mut self) -> &mut RequestHeader {
        match self {
            Self::H1(s) => s.req_header_mut(),
            Self::H2(s) => s.req_header_mut(),
            Self::Subrequest(s) => s.req_header_mut(),
            Self::Custom(s) => s.req_header_mut(),
        }
    }

    /// Return the header by name. None if the header doesn't exist.
    ///
    /// In case there are multiple headers under the same name, the first one will be returned. To
    /// get all the headers: use `self.req_header().headers.get_all()`.
    pub fn get_header<K: AsHeaderName>(&self, key: K) -> Option<&HeaderValue> {
        self.req_header().headers.get(key)
    }

    /// Get the header value in its raw format.
    /// If the header doesn't exist, return an empty slice.
    pub fn get_header_bytes<K: AsHeaderName>(&self, key: K) -> &[u8] {
        self.get_header(key).map_or(b"", |v| v.as_bytes())
    }

    /// Read the request body. Ok(None) if no (more) body to read
    pub async fn read_request_body(&mut self) -> Result<Option<Bytes>> {
        match self {
            Self::H1(s) => s.read_body_bytes().await,
            Self::H2(s) => s.read_body_bytes().await,
            Self::Subrequest(s) => s.read_body_bytes().await,
            Self::Custom(s) => s.read_body_bytes().await,
        }
    }

    /// Discard the request body by reading it until completion.
    ///
    /// This is useful for making streams reusable (in particular for HTTP/1.1) after returning an
    /// error before the whole body has been read.
    pub async fn drain_request_body(&mut self) -> Result<()> {
        match self {
            Self::H1(s) => s.drain_request_body().await,
            Self::H2(s) => s.drain_request_body().await,
            Self::Subrequest(s) => s.drain_request_body().await,
            Self::Custom(s) => s.drain_request_body().await,
        }
    }

    /// Write the response header to client
    /// Informational headers (status code 100-199, excluding 101) can be written multiple times the final
    /// response header (status code 200+ or 101) is written.
    pub async fn write_response_header(&mut self, resp: Box<ResponseHeader>) -> Result<()> {
        match self {
            Self::H1(s) => {
                s.write_response_header(resp).await?;
                Ok(())
            }
            Self::H2(s) => s.write_response_header(resp, false),
            Self::Subrequest(s) => {
                s.write_response_header(resp).await?;
                Ok(())
            }
            Self::Custom(s) => s.write_response_header(resp, false).await,
        }
    }

    /// Similar to `write_response_header()`, this fn will clone the `resp` internally
    pub async fn write_response_header_ref(&mut self, resp: &ResponseHeader) -> Result<()> {
        match self {
            Self::H1(s) => {
                s.write_response_header_ref(resp).await?;
                Ok(())
            }
            Self::H2(s) => s.write_response_header_ref(resp, false),
            Self::Subrequest(s) => {
                s.write_response_header_ref(resp).await?;
                Ok(())
            }
            Self::Custom(s) => s.write_response_header_ref(resp, false).await,
        }
    }

    /// Write the response body to client
    pub async fn write_response_body(&mut self, data: Bytes, end: bool) -> Result<()> {
        if data.is_empty() && !end {
            // writing 0 byte to a chunked encoding h1 would finish the stream
            // writing 0 bytes to h2 is noop
            // we don't want to actually write in either cases
            return Ok(());
        }
        match self {
            Self::H1(s) => {
                if !data.is_empty() {
                    s.write_body(&data).await?;
                }
                if end {
                    s.finish_body().await?;
                }
                Ok(())
            }
            Self::H2(s) => s.write_body(data, end).await,
            Self::Subrequest(s) => {
                s.write_body(data).await?;
                Ok(())
            }
            Self::Custom(s) => s.write_body(data, end).await,
        }
    }

    /// Write the response trailers to client
    pub async fn write_response_trailers(&mut self, trailers: HeaderMap) -> Result<()> {
        match self {
            Self::H1(_) => Ok(()), // TODO: support trailers for h1
            Self::H2(s) => s.write_trailers(trailers),
            Self::Subrequest(s) => s.write_trailers(Some(Box::new(trailers))).await,
            Self::Custom(s) => s.write_trailers(trailers).await,
        }
    }

    /// Finish the life of this request.
    /// For H1, if connection reuse is supported, a Some(Stream) will be returned, otherwise None.
    /// For H2, always return None because H2 stream is not reusable.
    /// For subrequests, there is no true underlying stream to return.
    pub async fn finish(self) -> Result<Option<Stream>> {
        match self {
            Self::H1(mut s) => {
                // need to flush body due to buffering
                s.finish_body().await?;
                s.reuse().await
            }
            Self::H2(mut s) => {
                s.finish()?;
                Ok(None)
            }
            Self::Subrequest(mut s) => {
                s.finish().await?;
                Ok(None)
            }
            Self::Custom(mut s) => {
                s.finish().await?;
                Ok(None)
            }
        }
    }

    /// Callback for cleanup logic on downstream specifically when we fail to proxy the session
    /// other than cleanup via finish().
    ///
    /// If caching the downstream failure may be independent of (and precede) an upstream error in
    /// which case this function may be called more than once.
    pub fn on_proxy_failure(&mut self, e: Box<Error>) {
        match self {
            Self::H1(_) | Self::H2(_) | Self::Custom(_) => {
                // all cleanup logic handled in finish(),
                // stream and resources dropped when session dropped
            }
            Self::Subrequest(ref mut s) => s.on_proxy_failure(e),
        }
    }

    pub async fn response_duplex_vec(&mut self, tasks: Vec<HttpTask>) -> Result<bool> {
        match self {
            Self::H1(s) => s.response_duplex_vec(tasks).await,
            Self::H2(s) => s.response_duplex_vec(tasks).await,
            Self::Subrequest(s) => s.response_duplex_vec(tasks).await,
            Self::Custom(s) => s.response_duplex_vec(tasks).await,
        }
    }

    /// Set connection reuse. `duration` defines how long the connection is kept open for the next
    /// request to reuse. Noop for h2 and subrequest
    pub fn set_keepalive(&mut self, duration: Option<u64>) {
        match self {
            Self::H1(s) => s.set_server_keepalive(duration),
            Self::H2(_) => {}
            Self::Subrequest(_) => {}
            Self::Custom(_) => {}
        }
    }

    /// Get the keepalive timeout. None if keepalive is disabled. Not applicable for h2 or
    /// subrequest
    pub fn get_keepalive(&self) -> Option<u64> {
        match self {
            Self::H1(s) => s.get_keepalive_timeout(),
            Self::H2(_) => None,
            Self::Subrequest(_) => None,
            Self::Custom(_) => None,
        }
    }

    /// Set the number of times the upstream connection connection for this
    /// session can be reused via keepalive. Noop for h2 and subrequest
    pub fn set_keepalive_reuses_remaining(&mut self, reuses: Option<u32>) {
        if let Self::H1(s) = self {
            s.set_keepalive_reuses_remaining(reuses);
        }
    }

    /// Get the number of times the upstream connection connection for this
    /// session can be reused via keepalive. Not applicable for h2 or
    /// subrequest
    pub fn get_keepalive_reuses_remaining(&self) -> Option<u32> {
        if let Self::H1(s) = self {
            s.get_keepalive_reuses_remaining()
        } else {
            None
        }
    }

    /// Sets the downstream read timeout. This will trigger if we're unable
    /// to read from the stream after `timeout`.
    ///
    /// This is a noop for h2.
    pub fn set_read_timeout(&mut self, timeout: Option<Duration>) {
        match self {
            Self::H1(s) => s.set_read_timeout(timeout),
            Self::H2(_) => {}
            Self::Subrequest(s) => s.set_read_timeout(timeout),
            Self::Custom(c) => c.set_read_timeout(timeout),
        }
    }

    /// Gets the downstream read timeout if set.
    pub fn get_read_timeout(&self) -> Option<Duration> {
        match self {
            Self::H1(s) => s.get_read_timeout(),
            Self::H2(_) => None,
            Self::Subrequest(s) => s.get_read_timeout(),
            Self::Custom(s) => s.get_read_timeout(),
        }
    }

    /// Sets the downstream write timeout. This will trigger if we're unable
    /// to write to the stream after `timeout`. If a `min_send_rate` is
    /// configured then the `min_send_rate` calculated timeout has higher priority.
    pub fn set_write_timeout(&mut self, timeout: Option<Duration>) {
        match self {
            Self::H1(s) => s.set_write_timeout(timeout),
            Self::H2(s) => s.set_write_timeout(timeout),
            Self::Subrequest(s) => s.set_write_timeout(timeout),
            Self::Custom(c) => c.set_write_timeout(timeout),
        }
    }

    /// Gets the downstream write timeout if set.
    pub fn get_write_timeout(&self) -> Option<Duration> {
        match self {
            Self::H1(s) => s.get_write_timeout(),
            Self::H2(s) => s.get_write_timeout(),
            Self::Subrequest(s) => s.get_write_timeout(),
            Self::Custom(s) => s.get_write_timeout(),
        }
    }

    /// Sets the total drain timeout, which will be applied while discarding the
    /// request body using `drain_request_body`.
    ///
    /// For HTTP/1.1, reusing a session requires ensuring that the request body
    /// is consumed. If the timeout is exceeded, the caller should give up on
    /// trying to reuse the session.
    pub fn set_total_drain_timeout(&mut self, timeout: Option<Duration>) {
        match self {
            Self::H1(s) => s.set_total_drain_timeout(timeout),
            Self::H2(s) => s.set_total_drain_timeout(timeout),
            Self::Subrequest(s) => s.set_total_drain_timeout(timeout),
            Self::Custom(c) => c.set_total_drain_timeout(timeout),
        }
    }

    /// Gets the total drain timeout if set.
    pub fn get_total_drain_timeout(&self) -> Option<Duration> {
        match self {
            Self::H1(s) => s.get_total_drain_timeout(),
            Self::H2(s) => s.get_total_drain_timeout(),
            Self::Subrequest(s) => s.get_total_drain_timeout(),
            Self::Custom(s) => s.get_total_drain_timeout(),
        }
    }

    /// Sets the minimum downstream send rate in bytes per second. This
    /// is used to calculate a write timeout in seconds based on the size
    /// of the buffer being written. If a `min_send_rate` is configured it
    /// has higher priority over a set `write_timeout`. The minimum send
    /// rate must be greater than zero.
    ///
    /// Calculated write timeout is guaranteed to be at least 1s if `min_send_rate`
    /// is greater than zero, a send rate of zero is equivalent to disabling.
    ///
    /// This is a noop for h2.
    pub fn set_min_send_rate(&mut self, rate: Option<usize>) {
        match self {
            Self::H1(s) => s.set_min_send_rate(rate),
            Self::H2(_) => {}
            Self::Subrequest(_) => {}
            Self::Custom(_) => {}
        }
    }

    /// Sets whether we ignore writing informational responses downstream.
    ///
    /// For HTTP/1.1 this is a noop if the response is Upgrade or Continue and
    /// Expect: 100-continue was set on the request.
    ///
    /// This is a noop for h2 because informational responses are always ignored.
    /// Subrequests will always proxy the info response and let the true downstream
    /// decide to ignore or not.
    pub fn set_ignore_info_resp(&mut self, ignore: bool) {
        match self {
            Self::H1(s) => s.set_ignore_info_resp(ignore),
            Self::H2(_) => {} // always ignored
            Self::Subrequest(_) => {}
            Self::Custom(_) => {} // always ignored
        }
    }

    /// Sets whether keepalive should be disabled if response is written prior to
    /// downstream body finishing.
    ///
    /// This is a noop for h2.
    pub fn set_close_on_response_before_downstream_finish(&mut self, close: bool) {
        match self {
            Self::H1(s) => s.set_close_on_response_before_downstream_finish(close),
            Self::H2(_) => {}         // always ignored
            Self::Subrequest(_) => {} // always ignored
            Self::Custom(_) => {}     // always ignored
        }
    }

    /// Return a digest of the request including the method, path and Host header
    // TODO: make this use a `Formatter`
    pub fn request_summary(&self) -> String {
        match self {
            Self::H1(s) => s.request_summary(),
            Self::H2(s) => s.request_summary(),
            Self::Subrequest(s) => s.request_summary(),
            Self::Custom(s) => s.request_summary(),
        }
    }

    /// Return the written response header. `None` if it is not written yet.
    /// Only the final (status code >= 200 or 101) response header will be returned
    pub fn response_written(&self) -> Option<&ResponseHeader> {
        match self {
            Self::H1(s) => s.response_written(),
            Self::H2(s) => s.response_written(),
            Self::Subrequest(s) => s.response_written(),
            Self::Custom(s) => s.response_written(),
        }
    }

    /// Give up the http session abruptly.
    /// For H1 this will close the underlying connection
    /// For H2 this will send RESET frame to end this stream without impacting the connection
    /// For subrequests, this will drop task senders and receivers.
    pub async fn shutdown(&mut self) {
        match self {
            Self::H1(s) => s.shutdown().await,
            Self::H2(s) => s.shutdown(),
            Self::Subrequest(s) => s.shutdown(),
            Self::Custom(s) => s.shutdown(0, "shutdown").await,
        }
    }

    pub fn to_h1_raw(&self) -> Bytes {
        match self {
            Self::H1(s) => s.get_headers_raw_bytes(),
            Self::H2(s) => s.pseudo_raw_h1_request_header(),
            Self::Subrequest(s) => s.get_headers_raw_bytes(),
            Self::Custom(c) => c.pseudo_raw_h1_request_header(),
        }
    }

    /// Whether the whole request body is sent
    pub fn is_body_done(&mut self) -> bool {
        match self {
            Self::H1(s) => s.is_body_done(),
            Self::H2(s) => s.is_body_done(),
            Self::Subrequest(s) => s.is_body_done(),
            Self::Custom(s) => s.is_body_done(),
        }
    }

    /// Notify the client that the entire body is sent
    /// for H1 chunked encoding, this will end the last empty chunk
    /// for H1 content-length, this has no effect.
    /// for H2, this will send an empty DATA frame with END_STREAM flag
    /// for subrequest, this will send a Done http task
    pub async fn finish_body(&mut self) -> Result<()> {
        match self {
            Self::H1(s) => s.finish_body().await.map(|_| ()),
            Self::H2(s) => s.finish(),
            Self::Subrequest(s) => s.finish().await.map(|_| ()),
            Self::Custom(s) => s.finish().await,
        }
    }

    pub fn generate_error(error: u16) -> ResponseHeader {
        match error {
            /* common error responses are pre-generated */
            502 => error_resp::HTTP_502_RESPONSE.clone(),
            400 => error_resp::HTTP_400_RESPONSE.clone(),
            _ => error_resp::gen_error_response(error),
        }
    }

    /// Send error response to client using a pre-generated error message.
    pub async fn respond_error(&mut self, error: u16) -> Result<()> {
        self.respond_error_with_body(error, Bytes::default()).await
    }

    /// Send error response to client using a pre-generated error message and custom body.
    pub async fn respond_error_with_body(&mut self, error: u16, body: Bytes) -> Result<()> {
        let mut resp = Self::generate_error(error);
        if !body.is_empty() {
            // error responses have a default content-length of zero
            resp.set_content_length(body.len())?
        }
        self.write_error_response(resp, body).await
    }

    /// Send an error response to a client with a response header and body.
    pub async fn write_error_response(&mut self, resp: ResponseHeader, body: Bytes) -> Result<()> {
        // TODO: we shouldn't be closing downstream connections on internally generated errors
        // and possibly other upstream connect() errors (connection refused, timeout, etc)
        //
        // This change is only here because we DO NOT re-use downstream connections
        // today on these errors and we should signal to the client that pingora is dropping it
        // rather than a misleading the client with 'keep-alive'
        self.set_keepalive(None);

        // If a response was already written and it's not informational 1xx, return.
        // The only exception is an informational 101 Switching Protocols, which is treated
        // as final response https://www.rfc-editor.org/rfc/rfc9110#section-15.2.2.
        if let Some(resp_written) = self.response_written().as_ref() {
            if !resp_written.status.is_informational() || resp_written.status == 101 {
                return Ok(());
            }
        }

        self.write_response_header(Box::new(resp)).await?;

        if !body.is_empty() {
            self.write_response_body(body, true).await?;
        } else {
            self.finish_body().await?;
        }

        custom_session!(self.finish_custom().await?);

        Ok(())
    }

    /// Whether there is no request body
    pub fn is_body_empty(&mut self) -> bool {
        match self {
            Self::H1(s) => s.is_body_empty(),
            Self::H2(s) => s.is_body_empty(),
            Self::Subrequest(s) => s.is_body_empty(),
            Self::Custom(s) => s.is_body_empty(),
        }
    }

    pub fn retry_buffer_truncated(&self) -> bool {
        match self {
            Self::H1(s) => s.retry_buffer_truncated(),
            Self::H2(s) => s.retry_buffer_truncated(),
            Self::Subrequest(s) => s.retry_buffer_truncated(),
            Self::Custom(s) => s.retry_buffer_truncated(),
        }
    }

    pub fn enable_retry_buffering(&mut self) {
        match self {
            Self::H1(s) => s.enable_retry_buffering(),
            Self::H2(s) => s.enable_retry_buffering(),
            Self::Subrequest(s) => s.enable_retry_buffering(),
            Self::Custom(s) => s.enable_retry_buffering(),
        }
    }

    pub fn get_retry_buffer(&self) -> Option<Bytes> {
        match self {
            Self::H1(s) => s.get_retry_buffer(),
            Self::H2(s) => s.get_retry_buffer(),
            Self::Subrequest(s) => s.get_retry_buffer(),
            Self::Custom(s) => s.get_retry_buffer(),
        }
    }

    /// Read body (same as `read_request_body()`) or pending forever until downstream
    /// terminates the session.
    pub async fn read_body_or_idle(&mut self, no_body_expected: bool) -> Result<Option<Bytes>> {
        match self {
            Self::H1(s) => s.read_body_or_idle(no_body_expected).await,
            Self::H2(s) => s.read_body_or_idle(no_body_expected).await,
            Self::Subrequest(s) => s.read_body_or_idle(no_body_expected).await,
            Self::Custom(s) => s.read_body_or_idle(no_body_expected).await,
        }
    }

    pub fn as_http1(&self) -> Option<&SessionV1> {
        match self {
            Self::H1(s) => Some(s),
            Self::H2(_) => None,
            Self::Subrequest(_) => None,
            Self::Custom(_) => None,
        }
    }

    pub fn as_http2(&self) -> Option<&SessionV2> {
        match self {
            Self::H1(_) => None,
            Self::H2(s) => Some(s),
            Self::Subrequest(_) => None,
            Self::Custom(_) => None,
        }
    }

    pub fn as_subrequest(&self) -> Option<&SessionSubrequest> {
        match self {
            Self::H1(_) => None,
            Self::H2(_) => None,
            Self::Subrequest(s) => Some(s),
            Self::Custom(_) => None,
        }
    }

    pub fn as_subrequest_mut(&mut self) -> Option<&mut SessionSubrequest> {
        match self {
            Self::H1(_) => None,
            Self::H2(_) => None,
            Self::Subrequest(s) => Some(s),
            Self::Custom(_) => None,
        }
    }

    pub fn as_custom(&self) -> Option<&dyn SessionCustom> {
        match self {
            Self::H1(_) => None,
            Self::H2(_) => None,
            Self::Subrequest(_) => None,
            Self::Custom(c) => Some(c.as_ref()),
        }
    }

    pub fn as_custom_mut(&mut self) -> Option<&mut Box<dyn SessionCustom>> {
        match self {
            Self::H1(_) => None,
            Self::H2(_) => None,
            Self::Subrequest(_) => None,
            Self::Custom(c) => Some(c),
        }
    }

    /// Write a 100 Continue response to the client.
    pub async fn write_continue_response(&mut self) -> Result<()> {
        match self {
            Self::H1(s) => s.write_continue_response().await,
            Self::H2(s) => s.write_response_header(
                Box::new(ResponseHeader::build(100, Some(0)).unwrap()),
                false,
            ),
            Self::Subrequest(s) => s.write_continue_response().await,
            // TODO(slava): is there any write_continue_response calls?
            Self::Custom(s) => {
                s.write_response_header(
                    Box::new(ResponseHeader::build(100, Some(0)).unwrap()),
                    false,
                )
                .await
            }
        }
    }

    /// Whether this request is for upgrade (e.g., websocket).
    pub fn is_upgrade_req(&self) -> bool {
        match self {
            Self::H1(s) => s.is_upgrade_req(),
            Self::H2(_) => false,
            Self::Subrequest(s) => s.is_upgrade_req(),
            Self::Custom(s) => s.is_upgrade_req(),
        }
    }

    /// Whether this session was fully upgraded (completed Upgrade handshake).
    pub fn was_upgraded(&self) -> bool {
        match self {
            Self::H1(s) => s.was_upgraded(),
            Self::H2(_) => false,
            Self::Subrequest(s) => s.was_upgraded(),
            Self::Custom(s) => s.was_upgraded(),
        }
    }

    /// Return how many response body bytes (application, not wire) already sent downstream
    pub fn body_bytes_sent(&self) -> usize {
        match self {
            Self::H1(s) => s.body_bytes_sent(),
            Self::H2(s) => s.body_bytes_sent(),
            Self::Subrequest(s) => s.body_bytes_sent(),
            Self::Custom(s) => s.body_bytes_sent(),
        }
    }

    /// Return how many request body bytes (application, not wire) already read from downstream
    pub fn body_bytes_read(&self) -> usize {
        match self {
            Self::H1(s) => s.body_bytes_read(),
            Self::H2(s) => s.body_bytes_read(),
            Self::Subrequest(s) => s.body_bytes_read(),
            Self::Custom(s) => s.body_bytes_read(),
        }
    }

    /// Return the [Digest] for the connection.
    pub fn digest(&self) -> Option<&Digest> {
        match self {
            Self::H1(s) => Some(s.digest()),
            Self::H2(s) => s.digest(),
            Self::Subrequest(s) => s.digest(),
            Self::Custom(s) => s.digest(),
        }
    }

    /// Return a mutable [Digest] reference for the connection.
    ///
    /// Will return `None` if multiple H2 streams are open.
    pub fn digest_mut(&mut self) -> Option<&mut Digest> {
        match self {
            Self::H1(s) => Some(s.digest_mut()),
            Self::H2(s) => s.digest_mut(),
            Self::Subrequest(s) => s.digest_mut(),
            Self::Custom(s) => s.digest_mut(),
        }
    }

    /// Return the client (peer) address of the connection.
    pub fn client_addr(&self) -> Option<&SocketAddr> {
        match self {
            Self::H1(s) => s.client_addr(),
            Self::H2(s) => s.client_addr(),
            Self::Subrequest(s) => s.client_addr(),
            Self::Custom(s) => s.client_addr(),
        }
    }

    /// Return the server (local) address of the connection.
    pub fn server_addr(&self) -> Option<&SocketAddr> {
        match self {
            Self::H1(s) => s.server_addr(),
            Self::H2(s) => s.server_addr(),
            Self::Subrequest(s) => s.server_addr(),
            Self::Custom(s) => s.server_addr(),
        }
    }

    /// Get the reference of the [Stream] that this HTTP/1 session is operating upon.
    /// None if the HTTP session is over H2, or a subrequest
    pub fn stream(&self) -> Option<&Stream> {
        match self {
            Self::H1(s) => Some(s.stream()),
            Self::H2(_) => None,
            Self::Subrequest(_) => None,
            Self::Custom(_) => None,
        }
    }
}


================================================
FILE: pingora-core/src/protocols/http/subrequest/body.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Subrequest body reader and writer.
//!
//! This implementation is very similar to v1 if not identical in many cases.
//! However it is generally much simpler because it does not have to handle
//! wire format bytes, simply basic checks such as content-length and when the
//! underlying channel (sender or receiver) is closed.

use bytes::Bytes;
use log::{debug, trace, warn};
use pingora_error::{
    Error,
    ErrorType::{self, *},
    OrErr, Result,
};
use std::fmt::Debug;
use tokio::sync::{mpsc, oneshot};

use crate::protocols::http::HttpTask;
use http::HeaderMap;

pub const PREMATURE_BODY_END: ErrorType = ErrorType::new("PrematureBodyEnd");

#[derive(Clone, Debug, PartialEq, Eq)]
pub enum ParseState {
    ToStart,
    Complete(usize),       // total size
    Partial(usize, usize), // size read, remaining size
    Done(usize),           // done but there is error, size read
    UntilClose(usize),     // read until connection closed, size read
}

type PS = ParseState;

pub struct BodyReader {
    pub body_state: ParseState,
    notify_wants_body: Option<oneshot::Sender<()>>,
}

impl BodyReader {
    pub fn new(notify_wants_body: Option<oneshot::Sender<()>>) -> Self {
        BodyReader {
            body_state: PS::ToStart,
            notify_wants_body,
        }
        // TODO: if wants body signal is None, init empty
    }

    pub fn need_init(&self) -> bool {
        matches!(self.body_state, PS::ToStart)
    }

    pub fn init_content_length(&mut self, cl: usize) {
        match cl {
            0 => self.body_state = PS::Complete(0),
            _ => {
                self.body_state = PS::Partial(0, cl);
            }
        }
    }

    pub fn init_close_delimited(&mut self) {
        self.body_state = PS::UntilClose(0);
    }

    /// Convert how we interpret the remainder of the body to read until close.
    /// This is used for responses without explicit framing.
    pub fn convert_to_close_delimited(&mut self) {
        if matches!(self.body_state, PS::UntilClose(_)) {
            // nothing to do, already in close-delimited mode
            return;
        }

        // reset body counter
        self.body_state = PS::UntilClose(0);
    }

    pub fn body_done(&self) -> bool {
        matches!(self.body_state, PS::Complete(_) | PS::Done(_))
    }

    pub fn body_empty(&self) -> bool {
        self.body_state == PS::Complete(0)
    }

    pub async fn read_body(&mut self, rx: &mut mpsc::Receiver<HttpTask>) -> Result<Option<Bytes>> {
        match self.body_state {
            PS::Complete(_) => Ok(None),
            PS::Done(_) => Ok(None),
            PS::Partial(_, _) => self.do_read_body(rx).await,
            PS::UntilClose(_) => self.do_read_body_until_closed(rx).await,
            PS::ToStart => panic!("need to init BodyReader first"),
        }
    }

    pub async fn do_read_body(
        &mut self,
        rx: &mut mpsc::Receiver<HttpTask>,
    ) -> Result<Option<Bytes>> {
        if let Some(notify) = self.notify_wants_body.take() {
            // fine if downstream isn't actively being read
            let _ = notify.send(());
        }
        let (bytes, end) = match rx.recv().await {
            Some(HttpTask::Body(bytes, end)) => (bytes, end),
            Some(task) => {
                // TODO: return an error into_down for Failed?
                return Error::e_explain(
                    InternalError,
                    format!("Unexpected HttpTask {task:?} while reading body (subrequest)"),
                );
            }
            None => (None, true), // downstream ended
        };

        match self.body_state {
            PS::Partial(read, to_read) => {
                let n = bytes.as_ref().map_or(0, |b| b.len());
                debug!(
                    "BodyReader body_state: {:?}, read data from IO: {n} (subrequest)",
                    self.body_state,
                );
                if bytes.is_none() {
                    self.body_state = PS::Done(read);
                    return Error::e_explain(ConnectionClosed, format!(
                        "Peer prematurely closed connection with {to_read} bytes of body remaining to read (subrequest)",
                    ));
                }
                if end && n < to_read {
                    // TODO: this doesn't flush the bytes we did receive to upstream
                    self.body_state = PS::Done(read + n);
                    return Error::e_explain(PREMATURE_BODY_END, format!(
                        "Peer prematurely ended body with {} bytes of body remaining to read (subrequest)",
                        to_read - n
                    ));
                }
                if n >= to_read {
                    if n > to_read {
                        warn!(
                            "Peer sent more data then expected: extra {}\
                               bytes, discarding them (subrequest)",
                            n - to_read
                        );
                    }
                    self.body_state = PS::Complete(read + to_read);
                    Ok(bytes.map(|b| b.slice(0..to_read)))
                } else {
                    self.body_state = PS::Partial(read + n, to_read - n);
                    Ok(bytes)
                }
            }
            _ => panic!("wrong body state: {:?} (subrequest)", self.body_state),
        }
    }

    pub async fn do_read_body_until_closed(
        &mut self,
        rx: &mut mpsc::Receiver<HttpTask>,
    ) -> Result<Option<Bytes>> {
        if let Some(notify) = self.notify_wants_body.take() {
            // fine if downstream isn't active, receiver will indicate this
            let _ = notify.send(());
        }

        let (bytes, end) = match rx.recv().await {
            Some(HttpTask::Body(bytes, end)) => (bytes, end),
            Some(task) => {
                return Error::e_explain(
                    InternalError,
                    format!("Unexpected HttpTask {task:?} while reading body (subrequest)"),
                );
            }
            None => (None, true), // downstream ended
        };
        let n = bytes.as_ref().map_or(0, |b| b.len());
        match self.body_state {
            PS::UntilClose(read) => {
                if bytes.is_none() {
                    self.body_state = PS::Complete(read);
                    Ok(None)
                } else if end {
                    // explicit end also signifies completion
                    self.body_state = PS::Complete(read + n);
                    Ok(bytes)
                } else {
                    self.body_state = PS::UntilClose(read + n);
                    Ok(bytes)
                }
            }
            _ => panic!("wrong body state: {:?} (subrequest)", self.body_state),
        }
    }
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub enum BodyMode {
    ToSelect,
    ContentLength(usize, usize), // total length to write, bytes already written
    UntilClose(usize),           //bytes written
    Complete(usize),             //bytes written
}

type BM = BodyMode;

pub struct BodyWriter {
    pub body_mode: BodyMode,
}

impl BodyWriter {
    pub fn new() -> Self {
        BodyWriter {
            body_mode: BM::ToSelect,
        }
    }

    pub fn init_close_delimited(&mut self) {
        self.body_mode = BM::UntilClose(0);
    }

    pub fn init_content_length(&mut self, cl: usize) {
        self.body_mode = BM::ContentLength(cl, 0);
    }

    pub async fn write_body(
        &mut self,
        sender: &mut mpsc::Sender<HttpTask>,
        bytes: Bytes,
    ) -> Result<Option<usize>> {
        trace!("Writing Body, size: {} (subrequest)", bytes.len());
        match self.body_mode {
            BM::Complete(_) => Ok(None),
            BM::ContentLength(_, _) => self.do_write_body(sender, bytes).await,
            BM::UntilClose(_) => self.do_write_until_close_body(sender, bytes).await,
            BM::ToSelect => panic!("wrong body phase: ToSelect (subrequest)"),
        }
    }

    pub fn finished(&self) -> bool {
        match self.body_mode {
            BM::Complete(_) => true,
            BM::ContentLength(total, written) => written >= total,
            _ => false,
        }
    }

    async fn do_write_body(
        &mut self,
        tx: &mut mpsc::Sender<HttpTask>,
        bytes: Bytes,
    ) -> Result<Option<usize>> {
        match self.body_mode {
            BM::ContentLength(total, written) => {
                if written >= total {
                    // already written full length
                    return Ok(None);
                }
                let mut to_write = total - written;
                if to_write < bytes.len() {
                    warn!("Trying to write data over content-length (subrequest): {total}");
                } else {
                    to_write = bytes.len();
                }
                let res = tx.send(HttpTask::Body(Some(bytes), false)).await;
                match res {
                    Ok(()) => {
                        self.body_mode = BM::ContentLength(total, written + to_write);
                        Ok(Some(to_write))
                    }
                    Err(e) => Error::e_because(WriteError, "while writing body (subrequest)", e),
                }
            }
            _ => panic!("wrong body mode: {:?} (subrequest)", self.body_mode),
        }
    }

    async fn do_write_until_close_body(
        &mut self,
        tx: &mut mpsc::Sender<HttpTask>,
        bytes: Bytes,
    ) -> Result<Option<usize>> {
        match self.body_mode {
            BM::UntilClose(written) => {
                let res = tx.send(HttpTask::Body(Some(bytes.clone()), false)).await;
                match res {
                    Ok(()) => {
                        self.body_mode = BM::UntilClose(written + bytes.len());
                        Ok(Some(bytes.len()))
                    }
                    Err(e) => Error::e_because(WriteError, "while writing body (subrequest)", e),
                }
            }
            _ => panic!("wrong body mode: {:?} (subrequest)", self.body_mode),
        }
    }

    pub async fn finish(&mut self, sender: &mut mpsc::Sender<HttpTask>) -> Result<Option<usize>> {
        match self.body_mode {
            BM::Complete(_) => Ok(None),
            BM::ContentLength(_, _) => self.do_finish_body(sender).await,
            BM::UntilClose(_) => self.do_finish_until_close_body(sender).await,
            BM::ToSelect => Ok(None),
        }
    }

    async fn do_finish_body(&mut self, tx: &mut mpsc::Sender<HttpTask>) -> Result<Option<usize>> {
        match self.body_mode {
            BM::ContentLength(total, written) => {
                self.body_mode = BM::Complete(written);
                if written < total {
                    return Error::e_explain(
                        PREMATURE_BODY_END,
                        format!("Content-length: {total} bytes written: {written} (subrequest)"),
                    );
                }
                tx.send(HttpTask::Done).await.or_err(
                    WriteError,
                    "while sending done task to downstream (subrequest)",
                )?;
                Ok(Some(written))
            }
            _ => panic!("wrong body mode: {:?} (subrequest)", self.body_mode),
        }
    }

    async fn do_finish_until_close_body(
        &mut self,
        tx: &mut mpsc::Sender<HttpTask>,
    ) -> Result<Option<usize>> {
        match self.body_mode {
            BM::UntilClose(written) => {
                self.body_mode = BM::Complete(written);
                tx.send(HttpTask::Done).await.or_err(
                    WriteError,
                    "while sending done task to downstream (subrequest)",
                )?;
                Ok(Some(written))
            }
            _ => panic!("wrong body mode: {:?} (subrequest)", self.body_mode),
        }
    }

    pub async fn write_trailers(
        &mut self,
        tx: &mut mpsc::Sender<HttpTask>,
        trailers: Option<Box<HeaderMap>>,
    ) -> Result<()> {
        // TODO more safeguards e.g. trailers after end of stream
        tx.send(HttpTask::Trailer(trailers)).await.or_err(
            WriteError,
            "while writing response trailers to downstream (subrequest)",
        )?;
        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn init_log() {
        let _ = env_logger::builder().is_test(true).try_init();
    }

    const TASK_BUFFER_SIZE: usize = 4;

    #[tokio::test]
    async fn read_with_body_content_length() {
        init_log();
        let input = b"abc";
        let (tx, mut rx) = mpsc::channel::<HttpTask>(TASK_BUFFER_SIZE);

        let mut body_reader = BodyReader::new(None);
        body_reader.init_content_length(3);

        tx.send(HttpTask::Body(Some(Bytes::from(&input[..])), false))
            .await
            .unwrap();
        let res = body_reader.read_body(&mut rx).await.unwrap().unwrap();
        assert_eq!(res, &input[..]);
        assert_eq!(body_reader.body_state, ParseState::Complete(3));
    }

    #[tokio::test]
    async fn read_with_body_content_length_2() {
        init_log();
        let input1 = b"a";
        let input2 = b"bc";
        let (tx, mut rx) = mpsc::channel::<HttpTask>(TASK_BUFFER_SIZE);

        let mut body_reader = BodyReader::new(None);
        body_reader.init_content_length(3);

        tx.send(HttpTask::Body(Some(Bytes::from(&input1[..])), false))
            .await
            .unwrap();
        let res = body_reader.read_body(&mut rx).await.unwrap().unwrap();
        assert_eq!(res, &input1[..]);
        assert_eq!(body_reader.body_state, ParseState::Partial(1, 2));

        tx.send(HttpTask::Body(Some(Bytes::from(&input2[..])), true))
            .await
            .unwrap();
        let res = body_reader.read_body(&mut rx).await.unwrap().unwrap();
        assert_eq!(res, &input2[..]);
        assert_eq!(body_reader.body_state, ParseState::Complete(3));
    }

    #[tokio::test]
    async fn read_with_body_content_length_empty_task() {
        init_log();
        let input1 = b"a";
        let input2 = b""; // zero length body task
        let (tx, mut rx) = mpsc::channel::<HttpTask>(TASK_BUFFER_SIZE);

        let mut body_reader = BodyReader::new(None);
        body_reader.init_content_length(3);

        tx.send(HttpTask::Body(Some(Bytes::from(&input1[..])), false))
            .await
            .unwrap();
        let res = body_reader.read_body(&mut rx).await.unwrap().unwrap();
        assert_eq!(res, &input1[..]);
        assert_eq!(body_reader.body_state, ParseState::Partial(1, 2));

        // subrequest can allow empty body tasks
        tx.send(HttpTask::Body(Some(Bytes::from(&input2[..])), false))
            .await
            .unwrap();
        let res = body_reader.read_body(&mut rx).await.unwrap().unwrap();
        assert_eq!(res, &input2[..]);
        assert_eq!(body_reader.body_state, ParseState::Partial(1, 2));

        // premature end of stream still errors
        tx.send(HttpTask::Body(Some(Bytes::from(&input2[..])), true))
            .await
            .unwrap();
        let res = body_reader.read_body(&mut rx).await.unwrap_err();
        assert_eq!(&PREMATURE_BODY_END, res.etype());
        assert_eq!(body_reader.body_state, ParseState::Done(1));
    }

    #[tokio::test]
    async fn read_with_body_content_length_less() {
        init_log();
        let input1 = b"a";
        let (tx, mut rx) = mpsc::channel::<HttpTask>(TASK_BUFFER_SIZE);

        let mut body_reader = BodyReader::new(None);
        body_reader.init_content_length(3);

        tx.send(HttpTask::Body(Some(Bytes::from(&input1[..])), false))
            .await
            .unwrap();
        let res = body_reader.read_body(&mut rx).await.unwrap().unwrap();
        assert_eq!(res, &input1[..]);
        assert_eq!(body_reader.body_state, ParseState::Partial(1, 2));

        drop(tx);
        let res = body_reader.read_body(&mut rx).await.unwrap_err();
        assert_eq!(&ConnectionClosed, res.etype());
        assert_eq!(body_reader.body_state, ParseState::Done(1));
    }

    #[tokio::test]
    async fn read_with_body_content_length_more() {
        init_log();
        let input1 = b"a";
        let input2 = b"bcd";
        let (tx, mut rx) = mpsc::channel::<HttpTask>(TASK_BUFFER_SIZE);

        let mut body_reader = BodyReader::new(None);
        body_reader.init_content_length(3);

        tx.send(HttpTask::Body(Some(Bytes::from(&input1[..])), false))
            .await
            .unwrap();
        let res = body_reader.read_body(&mut rx).await.unwrap().unwrap();
        assert_eq!(res, &input1[..]);
        assert_eq!(body_reader.body_state, ParseState::Partial(1, 2));

        tx.send(HttpTask::Body(Some(Bytes::from(&input2[..])), false))
            .await
            .unwrap();
        let res = body_reader.read_body(&mut rx).await.unwrap().unwrap();
        assert_eq!(res, &input2[0..2]);
        assert_eq!(body_reader.body_state, ParseState::Complete(3));
    }

    #[tokio::test]
    async fn read_with_body_until_close() {
        init_log();
        let input1 = b"a";
        let input2 = b""; // zero length body but not actually close
        let (tx, mut rx) = mpsc::channel::<HttpTask>(TASK_BUFFER_SIZE);
        let mut body_reader = BodyReader::new(None);
        body_reader.init_close_delimited();

        tx.send(HttpTask::Body(Some(Bytes::from(&input1[..])), false))
            .await
            .unwrap();
        let res = body_reader.read_body(&mut rx).await.unwrap().unwrap();
        assert_eq!(res, &input1[..]);
        assert_eq!(body_reader.body_state, ParseState::UntilClose(1));

        tx.send(HttpTask::Body(Some(Bytes::from(&input2[..])), false))
            .await
            .unwrap();
        let res = body_reader.read_body(&mut rx).await.unwrap().unwrap();
        assert_eq!(res, &input2[..]);
        assert_eq!(body_reader.body_state, ParseState::UntilClose(1));

        // sending end closed
        drop(tx);
        let res = body_reader.read_body(&mut rx).await.unwrap();
        assert_eq!(res, None);
        assert_eq!(body_reader.body_state, ParseState::Complete(1));
    }

    #[tokio::test]
    async fn write_body_cl() {
        init_log();
        let output = b"a";
        let (mut tx, mut rx) = mpsc::channel::<HttpTask>(TASK_BUFFER_SIZE);
        let mut body_writer = BodyWriter::new();
        body_writer.init_content_length(1);
        assert_eq!(body_writer.body_mode, BodyMode::ContentLength(1, 0));
        let res = body_writer
            .write_body(&mut tx, Bytes::from(&output[..]))
            .await
            .unwrap()
            .unwrap();
        assert_eq!(res, 1);
        assert_eq!(body_writer.body_mode, BodyMode::ContentLength(1, 1));
        // write again, over the limit
        let res = body_writer
            .write_body(&mut tx, Bytes::from(&output[..]))
            .await
            .unwrap();
        assert_eq!(res, None);
        assert_eq!(body_writer.body_mode, BodyMode::ContentLength(1, 1));
        let res = body_writer.finish(&mut tx).await.unwrap().unwrap();
        assert_eq!(res, 1);
        assert_eq!(body_writer.body_mode, BodyMode::Complete(1));

        // only one body task written
        match rx.try_recv().unwrap() {
            HttpTask::Body(b, end) => {
                assert_eq!(b.unwrap(), &output[..]);
                assert!(!end);
            }
            task => panic!("unexpected task {task:?}"),
        }
        assert!(matches!(rx.try_recv().unwrap(), HttpTask::Done));
        drop(tx);

        assert_eq!(
            rx.try_recv().unwrap_err(),
            mpsc::error::TryRecvError::Disconnected
        );
    }

    #[tokio::test]
    async fn write_body_until_close() {
        init_log();
        let data = b"a";
        let (mut tx, mut rx) = mpsc::channel::<HttpTask>(TASK_BUFFER_SIZE);
        let mut body_writer = BodyWriter::new();
        body_writer.init_close_delimited();
        assert_eq!(body_writer.body_mode, BodyMode::UntilClose(0));
        let res = body_writer
            .write_body(&mut tx, Bytes::from(&data[..]))
            .await
            .unwrap()
            .unwrap();
        assert_eq!(res, 1);
        assert_eq!(body_writer.body_mode, BodyMode::UntilClose(1));
        match rx.try_recv().unwrap() {
            HttpTask::Body(b, end) => {
                assert_eq!(b.unwrap().as_ref(), data);
                assert!(!end);
            }
            task => panic!("unexpected task {task:?}"),
        }

        let res = body_writer
            .write_body(&mut tx, Bytes::from(&data[..]))
            .await
            .unwrap()
            .unwrap();
        assert_eq!(res, 1);
        assert_eq!(body_writer.body_mode, BodyMode::UntilClose(2));
        let res = body_writer.finish(&mut tx).await.unwrap().unwrap();
        assert_eq!(res, 2);
        assert_eq!(body_writer.body_mode, BodyMode::Complete(2));
        match rx.try_recv().unwrap() {
            HttpTask::Body(b, end) => {
                assert_eq!(b.unwrap().as_ref(), data);
                assert!(!end);
            }
            task => panic!("unexpected task {task:?}"),
        }
        assert!(matches!(rx.try_recv().unwrap(), HttpTask::Done));

        assert_eq!(rx.try_recv().unwrap_err(), mpsc::error::TryRecvError::Empty);
    }
}


================================================
FILE: pingora-core/src/protocols/http/subrequest/dummy.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use crate::protocols::raw_connect::ProxyDigest;
use crate::protocols::{
    GetProxyDigest, GetSocketDigest, GetTimingDigest, Peek, SocketDigest, Ssl, TimingDigest,
    UniqueID, UniqueIDType,
};
use async_trait::async_trait;
use core::pin::Pin;
use core::task::{Context, Poll};
use std::io::Cursor;
use std::sync::Arc;
use tokio::io::{AsyncRead, AsyncWrite, Error, ReadBuf};

// An async IO stream that returns the request when being read from and dumps the data to the void
// when being write to
#[derive(Debug)]
pub(crate) struct DummyIO(Cursor<Vec<u8>>);

impl DummyIO {
    pub fn new(read_bytes: &[u8]) -> Self {
        DummyIO(Cursor::new(Vec::from(read_bytes)))
    }
}

impl AsyncRead for DummyIO {
    fn poll_read(
        mut self: Pin<&mut Self>,
        cx: &mut Context<'_>,
        buf: &mut ReadBuf<'_>,
    ) -> Poll<Result<(), Error>> {
        if self.0.position() < self.0.get_ref().len() as u64 {
            Pin::new(&mut self.0).poll_read(cx, buf)
        } else {
            // all data is read, pending forever otherwise the stream is considered closed
            Poll::Pending
        }
    }
}

impl AsyncWrite for DummyIO {
    fn poll_write(
        self: Pin<&mut Self>,
        _cx: &mut Context<'_>,
        buf: &[u8],
    ) -> Poll<Result<usize, Error>> {
        Poll::Ready(Ok(buf.len()))
    }

    fn poll_flush(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Result<(), Error>> {
        Poll::Ready(Ok(()))
    }
    fn poll_shutdown(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<Result<(), Error>> {
        Poll::Ready(Ok(()))
    }
}

impl UniqueID for DummyIO {
    fn id(&self) -> UniqueIDType {
        0 // placeholder
    }
}

impl Ssl for DummyIO {}

impl GetTimingDigest for DummyIO {
    fn get_timing_digest(&self) -> Vec<Option<TimingDigest>> {
        vec![]
    }
}

impl GetProxyDigest for DummyIO {
    fn get_proxy_digest(&self) -> Option<Arc<ProxyDigest>> {
        None
    }
}

impl GetSocketDigest for DummyIO {
    fn get_socket_digest(&self) -> Option<Arc<SocketDigest>> {
        None
    }
}

impl Peek for DummyIO {}

#[async_trait]
impl crate::protocols::Shutdown for DummyIO {
    async fn shutdown(&mut self) -> () {}
}

#[tokio::test]
async fn test_dummy_io() {
    use futures::FutureExt;
    use tokio::io::{AsyncReadExt, AsyncWriteExt};

    let mut dummy = DummyIO::new(&[1, 2]);
    let res = dummy.read_u8().await;
    assert_eq!(res.unwrap(), 1);
    let res = dummy.read_u8().await;
    assert_eq!(res.unwrap(), 2);
    let res = dummy.read_u8().now_or_never();
    assert!(res.is_none()); // pending forever
    let res = dummy.write_u8(0).await;
    assert!(res.is_ok());
}


================================================
FILE: pingora-core/src/protocols/http/subrequest/mod.rs
================================================
pub(crate) mod body;
pub(crate) mod dummy;
pub mod server;


================================================
FILE: pingora-core/src/protocols/http/subrequest/server.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! # HTTP server session for subrequests
//!
//! This server session is _very_ similar to the implementation for v1, if not
//! identical in many cases. Though in theory subrequests are HTTP version
//! agnostic in reality this means that they must interpret any version-specific
//! idiosyncracies such as Connection: upgrade headers in H1 because they
//! "stand-in" for the actual main Session when running proxy logic. As much as
//! possible they should defer downstream-specific logic to the actual downstream
//! session and act more or less as a pipe.
//!
//! The session also instantiates a [`SubrequestHandle`] that contains necessary
//! communication channels with the subrequest, to make it possible to send
//! and receive data.
//!
//! Its write calls will send `HttpTask`s to the handle channels, instead of
//! flushing to an actual underlying stream.
//!
//! Connection reuse and keep-alive are not supported because there is no
//! actual underlying stream, only transient channels per request.

use bytes::Bytes;
use http::HeaderValue;
use http::{header, header::AsHeaderName, HeaderMap, Method};
use log::{debug, trace, warn};
use pingora_error::{Error, ErrorType::*, OkOrErr, Result};
use pingora_http::{RequestHeader, ResponseHeader};
use pingora_timeout::timeout;
use std::time::Duration;
use tokio::sync::{mpsc, oneshot};

use super::body::{BodyReader, BodyWriter};
use crate::protocols::http::{
    body_buffer::FixedBuffer,
    server::Session as GenericHttpSession,
    subrequest::dummy::DummyIO,
    v1::common::{header_value_content_length, is_chunked_encoding_from_headers, BODY_BUF_LIMIT},
    v1::server::HttpSession as SessionV1,
    HttpTask,
};
use crate::protocols::{Digest, SocketAddr};

/// The HTTP server session
pub struct HttpSession {
    // these are only options because we allow dropping them separately on shutdown
    tx: Option<mpsc::Sender<HttpTask>>,
    rx: Option<mpsc::Receiver<HttpTask>>,
    // Currently subrequest session is initialized via a dummy SessionV1 only
    // TODO: need to be able to indicate H2 / other HTTP versions here
    v1_inner: Box<SessionV1>,
    proxy_error: Option<oneshot::Sender<Box<Error>>>, // option to consume the sender
    read_req_header: bool,
    response_written: Option<ResponseHeader>,
    read_timeout: Option<Duration>,
    write_timeout: Option<Duration>,
    total_drain_timeout: Option<Duration>,
    body_bytes_sent: usize,
    body_bytes_read: usize,
    retry_buffer: Option<FixedBuffer>,
    body_reader: BodyReader,
    body_writer: BodyWriter,
    upgraded: bool,
    // TODO: likely doesn't need to be a separate bool when/if moving away from dummy SessionV1
    clear_request_body_headers: bool,
    digest: Option<Box<Digest>>,
}

/// A handle to the subrequest session itself to interact or read from it.
pub struct SubrequestHandle {
    /// Channel sender (for subrequest input)
    pub tx: mpsc::Sender<HttpTask>,
    /// Channel receiver (for subrequest output)
    pub rx: mpsc::Receiver<HttpTask>,
    /// Indicates when subrequest wants to start reading body input
    pub subreq_wants_body: oneshot::Receiver<()>,
    /// Any final or downstream error that was encountered while proxying
    pub subreq_proxy_error: oneshot::Receiver<Box<Error>>,
}

impl SubrequestHandle {
    /// Spawn a task to drain received HttpTasks.
    pub fn drain_tasks(mut self) -> tokio::task::JoinHandle<()> {
        tokio::spawn(async move {
            let _tx = self.tx; // keep handle to sender alive
            while self.rx.recv().await.is_some() {}
            trace!("subrequest dropped");
        })
    }
}

impl HttpSession {
    /// Create a new http server session for a subrequest.
    /// The created session needs to call [`Self::read_request()`] first before performing
    /// any other operations.
    pub fn new_from_session(session: &GenericHttpSession) -> (Self, SubrequestHandle) {
        let v1_inner = SessionV1::new(Box::new(DummyIO::new(&session.to_h1_raw())));
        let digest = session.digest().cloned();
        // allow buffering a small number of tasks, otherwise exert backpressure
        const CHANNEL_BUFFER_SIZE: usize = 4;
        let (downstream_tx, downstream_rx) = mpsc::channel(CHANNEL_BUFFER_SIZE);
        let (upstream_tx, upstream_rx) = mpsc::channel(CHANNEL_BUFFER_SIZE);
        let (wants_body_tx, wants_body_rx) = oneshot::channel();
        let (proxy_error_tx, proxy_error_rx) = oneshot::channel();
        (
            HttpSession {
                v1_inner: Box::new(v1_inner),
                tx: Some(upstream_tx),
                rx: Some(downstream_rx),
                proxy_error: Some(proxy_error_tx),
                body_reader: BodyReader::new(Some(wants_body_tx)),
                body_writer: BodyWriter::new(),
                read_req_header: false,
                response_written: None,
                read_timeout: None,
                write_timeout: None,
                total_drain_timeout: None,
                body_bytes_sent: 0,
                body_bytes_read: 0,
                retry_buffer: None,
                upgraded: false,
                clear_request_body_headers: false,
                digest: digest.map(Box::new),
            },
            SubrequestHandle {
                tx: downstream_tx,
                rx: upstream_rx,
                subreq_wants_body: wants_body_rx,
                subreq_proxy_error: proxy_error_rx,
            },
        )
    }

    /// Read the request header. Return `Ok(Some(n))` where the read and parsing are successful.
    pub async fn read_request(&mut self) -> Result<Option<usize>> {
        let res = self.v1_inner.read_request().await?;
        if res.is_none() {
            // this is when h1 client closes the connection without sending data,
            // which shouldn't be the case for a subrequest session just created
            return Error::e_explain(InternalError, "no session request header provided");
        }
        self.read_req_header = true;
        if self.clear_request_body_headers {
            // indicated that we wanted to clear these headers in the past, do so now
            self.clear_request_body_headers();
        }
        Ok(res)
    }

    /// Validate the request header read. This function must be called after the request header
    /// read.
    /// # Panics
    /// this function and most other functions will panic if called before [`Self::read_request()`]
    pub fn validate_request(&self) -> Result<()> {
        self.v1_inner.validate_request()
    }

    /// Return a reference of the `RequestHeader` this session read
    /// # Panics
    /// this function and most other functions will panic if called before [`Self::read_request()`]
    pub fn req_header(&self) -> &RequestHeader {
        self.v1_inner.req_header()
    }

    /// Return a mutable reference of the `RequestHeader` this session read
    /// # Panics
    /// this function and most other functions will panic if called before [`Self::read_request()`]
    pub fn req_header_mut(&mut self) -> &mut RequestHeader {
        self.v1_inner.req_header_mut()
    }

    /// Get the header value for the given header name
    /// If there are multiple headers under the same name, the first one will be returned
    /// Use `self.req_header().header.get_all(name)` to get all the headers under the same name
    pub fn get_header(&self, name: impl AsHeaderName) -> Option<&HeaderValue> {
        self.v1_inner.get_header(name)
    }

    /// Return the method of this request. None if the request is not read yet.
    pub(super) fn get_method(&self) -> Option<&http::Method> {
        self.v1_inner.get_method()
    }

    /// Return the path of the request (i.e., the `/hello?1` of `GET /hello?1 HTTP1.1`)
    /// An empty slice will be used if there is no path or the request is not read yet
    pub(super) fn get_path(&self) -> &[u8] {
        self.v1_inner.get_path()
    }

    /// Return the host header of the request. An empty slice will be used if there is no host header
    pub(super) fn get_host(&self) -> &[u8] {
        self.v1_inner.get_host()
    }

    /// Return a string `$METHOD $PATH, Host: $HOST`. Mostly for logging and debug purpose
    pub fn request_summary(&self) -> String {
        format!(
            "{} {}, Host: {} (subrequest)",
            self.get_method().map_or("-", |r| r.as_str()),
            String::from_utf8_lossy(self.get_path()),
            String::from_utf8_lossy(self.get_host())
        )
    }

    /// Is the request a upgrade request
    pub fn is_upgrade_req(&self) -> bool {
        self.v1_inner.is_upgrade_req()
    }

    /// Get the request header as raw bytes, `b""` when the header doesn't exist
    pub fn get_header_bytes(&self, name: impl AsHeaderName) -> &[u8] {
        self.v1_inner.get_header_bytes(name)
    }

    /// Read the request body. `Ok(None)` when there is no (more) body to read.
    pub async fn read_body_bytes(&mut self) -> Result<Option<Bytes>> {
        let read = self.read_body().await?;
        Ok(read.inspect(|b| {
            self.body_bytes_read += b.len();
            if let Some(buffer) = self.retry_buffer.as_mut() {
                buffer.write_to_buffer(b);
            }
        }))
    }

    async fn do_read_body(&mut self) -> Result<Option<Bytes>> {
        self.init_body_reader();
        self.body_reader
            .read_body(self.rx.as_mut().expect("rx valid before shutdown"))
            .await
    }

    /// Read the body bytes with timeout.
    async fn read_body(&mut self) -> Result<Option<Bytes>> {
        match self.read_timeout {
            Some(t) => match timeout(t, self.do_read_body()).await {
                Ok(res) => res,
                Err(_) => Error::e_explain(
                    ReadTimedout,
                    format!("reading body, timeout: {t:?} (subrequest)"),
                ),
            },
            None => self.do_read_body().await,
        }
    }

    async fn do_drain_request_body(&mut self) -> Result<()> {
        loop {
            match self.read_body_bytes().await {
                Ok(Some(_)) => { /* continue to drain */ }
                Ok(None) => return Ok(()), // done
                Err(e) => return Err(e),
            }
        }
    }

    /// Drain the request body. `Ok(())` when there is no (more) body to read.
    pub async fn drain_request_body(&mut self) -> Result<()> {
        if self.is_body_done() {
            return Ok(());
        }
        match self.total_drain_timeout {
            Some(t) => match timeout(t, self.do_drain_request_body()).await {
                Ok(res) => res,
                Err(_) => Error::e_explain(
                    ReadTimedout,
                    format!("draining body, timeout: {t:?} (subrequest)"),
                ),
            },
            None => self.do_drain_request_body().await,
        }
    }

    /// Whether there is no (more) body to be read.
    pub fn is_body_done(&mut self) -> bool {
        self.init_body_reader();
        self.body_reader.body_done()
    }

    /// Whether the request has an empty body
    /// Because HTTP 1.1 clients have to send either `Content-Length` or `Transfer-Encoding` in order
    /// to signal the server that it will send the body, this function returns accurate results even
    /// only when the request header is just read.
    pub fn is_body_empty(&mut self) -> bool {
        self.init_body_reader();
        self.body_reader.body_empty()
    }

    /// Write the response header to the client.
    /// This function can be called more than once to send 1xx informational headers excluding 101.
    pub async fn write_response_header(&mut self, header: Box<ResponseHeader>) -> Result<()> {
        if let Some(resp) = self.response_written.as_ref() {
            if !resp.status.is_informational() || self.upgraded {
                warn!("Respond header is already sent, cannot send again (subrequest)");
                return Ok(());
            }
        }

        // XXX: don't add additional downstream headers, unlike h1, subreq is mostly treated as a pipe

        // Allow informational header (excluding 101) to pass through without affecting the state
        // of the request
        if header.status == 101 || !header.status.is_informational() {
            // reset request body to done for incomplete upgrade handshakes
            if let Some(upgrade_ok) = self.is_upgrade(&header) {
                if upgrade_ok {
                    debug!("ok upgrade handshake");
                    // For ws we use HTTP1_0 do_read_body_until_closed
                    //
                    // On ws close the initiator sends a close frame and
                    // then waits for a response from the peer, once it receives
                    // a response it closes the conn. After receiving a
                    // control frame indicating the connection should be closed,
                    // a peer discards any further data received.
                    // https://www.rfc-editor.org/rfc/rfc6455#section-1.4
                    self.upgraded = true;
                    // Now that the upgrade was successful, we need to change
                    // how we interpret the rest of the body as pass-through.
                    if self.body_reader.need_init() {
                        self.init_body_reader();
                    } else {
                        // already initialized
                        // immediately start reading the rest of the body as upgraded
                        // (in theory most upgraded requests shouldn't have any body)
                        //
                        // TODO: https://datatracker.ietf.org/doc/html/rfc9110#name-upgrade
                        // the most spec-compliant behavior is to switch interpretation
                        // after sending the former body. For now we immediately
                        // switch interpretation to match nginx behavior.
                        // TODO: this has no effect resetting the body counter of TE chunked
                        self.body_reader.convert_to_close_delimited();
                    }
                } else {
                    debug!("bad upgrade handshake!");
                    // continue to read body as-is, this is now just a regular request
                }
            }
            self.init_body_writer(&header);
        }

        // TODO propagate h2 end
        debug!("send response header (subrequest)");
        match self
            .tx
            .as_mut()
            .expect("tx valid before shutdown")
            .send(HttpTask::Header(header.clone(), false))
            .await
        {
            Ok(()) => {
                self.response_written = Some(*header);
                Ok(())
            }
            Err(e) => Error::e_because(WriteError, "writing response header", e),
        }
    }

    /// Return the response header if it is already sent.
    pub fn response_written(&self) -> Option<&ResponseHeader> {
        self.response_written.as_ref()
    }

    /// `Some(true)` if the this is a successful upgrade
    /// `Some(false)` if the request is an upgrade but the response refuses it
    /// `None` if the request is not an upgrade.
    pub fn is_upgrade(&self, header: &ResponseHeader) -> Option<bool> {
        self.v1_inner.is_upgrade(header)
    }

    /// Was this request successfully turned into an upgraded connection?
    ///
    /// Both the request had to have been an `Upgrade` request
    /// and the response had to have been a `101 Switching Protocols`.
    // XXX: this should only be valid if subrequest is standing in for
    // a v1 session.
    pub fn was_upgraded(&self) -> bool {
        self.upgraded
    }

    fn init_body_writer(&mut self, header: &ResponseHeader) {
        use http::StatusCode;
        /* the following responses don't have body 204, 304, and HEAD */
        if matches!(
            header.status,
            StatusCode::NO_CONTENT | StatusCode::NOT_MODIFIED
        ) || self.get_method() == Some(&Method::HEAD)
        {
            self.body_writer.init_content_length(0);
            return;
        }

        if header.status.is_informational() && header.status != StatusCode::SWITCHING_PROTOCOLS {
            // 1xx response, not enough to init body
            return;
        }

        if self.is_upgrade(header) == Some(true) {
            self.body_writer.init_close_delimited();
        } else if is_chunked_encoding_from_headers(&header.headers) {
            // transfer-encoding takes priority over content-length
            self.body_writer.init_close_delimited();
        } else {
            let content_length =
                header_value_content_length(header.headers.get(http::header::CONTENT_LENGTH));
            match content_length {
                Some(length) => {
                    self.body_writer.init_content_length(length);
                }
                None => {
                    /* TODO: 1. connection: keepalive cannot be used,
                    2. mark connection must be closed */
                    self.body_writer.init_close_delimited();
                }
            }
        }
    }

    /// Same as [`Self::write_response_header()`] but takes a reference.
    pub async fn write_response_header_ref(&mut self, resp: &ResponseHeader) -> Result<()> {
        self.write_response_header(Box::new(resp.clone())).await
    }

    async fn do_write_body(&mut self, buf: Bytes) -> Result<Option<usize>> {
        let written = self
            .body_writer
            .write_body(self.tx.as_mut().expect("tx valid before shutdown"), buf)
            .await;

        if let Ok(Some(num_bytes)) = written {
            self.body_bytes_sent += num_bytes;
        }

        written
    }

    /// Write response body to the client. Return `Ok(None)` when there shouldn't be more body
    /// to be written, e.g., writing more bytes than what the `Content-Length` header suggests
    pub async fn write_body(&mut self, buf: Bytes) -> Result<Option<usize>> {
        // TODO: check if the response header is written
        match self.write_timeout {
            Some(t) => match timeout(t, self.do_write_body(buf)).await {
                Ok(res) => res,
                Err(_) => Error::e_explain(WriteTimedout, format!("writing body, timeout: {t:?}")),
            },
            None => self.do_write_body(buf).await,
        }
    }

    fn maybe_force_close_body_reader(&mut self) {
        if self.upgraded && !self.body_reader.body_done() {
            // response is done, reset the request body to close
            self.body_reader.init_content_length(0);
        }
    }

    /// Signal that there is no more body to write.
    /// This call will try to flush the buffer if there is any un-flushed data.
    /// For chunked encoding response, this call will also send the last chunk.
    /// For upgraded sessions, this call will also close the reading of the client body.
    pub async fn finish(&mut self) -> Result<Option<usize>> {
        let res = self
            .body_writer
            .finish(self.tx.as_mut().expect("tx valid before shutdown"))
            .await?;

        self.maybe_force_close_body_reader();
        Ok(res)
    }

    /// Signal to error listener held by SubrequestHandle that a proxy error was encountered,
    /// and pass along what that error was.
    ///
    /// This is helpful to signal what errors were encountered outside of the proxy state machine,
    /// e.g. during subrequest request filters.
    ///
    /// Note: in the case of multiple proxy failures e.g. when caching, only the first error will
    /// be propagated (i.e. downstream error first if it goes away before upstream).
    pub fn on_proxy_failure(&mut self, e: Box<Error>) {
        // fine if handle is gone
        if let Some(sender) = self.proxy_error.take() {
            let _ = sender.send(e);
        }
    }

    /// Return how many response body bytes (application, not wire) already sent downstream
    pub fn body_bytes_sent(&self) -> usize {
        self.body_bytes_sent
    }

    /// Return how many request body bytes (application, not wire) already read from downstream
    pub fn body_bytes_read(&self) -> usize {
        self.body_bytes_read
    }

    fn is_chunked_encoding(&self) -> bool {
        is_chunked_encoding_from_headers(&self.req_header().headers)
    }

    /// Clear body-related subrequest headers.
    ///
    /// This is ok to call before the request is read; the headers will then be cleared after
    /// reading the request header.
    pub fn clear_request_body_headers(&mut self) {
        self.clear_request_body_headers = true;
        if self.read_req_header {
            let req = self.v1_inner.req_header_mut();
            req.remove_header(&header::CONTENT_LENGTH);
            req.remove_header(&header::TRANSFER_ENCODING);
            req.remove_header(&header::CONTENT_TYPE);
            req.remove_header(&header::CONTENT_ENCODING);
        }
    }

    fn init_body_reader(&mut self) {
        if self.body_reader.need_init() {
            // reset retry buffer
            if let Some(buffer) = self.retry_buffer.as_mut() {
                buffer.clear();
            }

            if self.was_upgraded() {
                // if upgraded _post_ 101 (and body was not init yet)
                // treat as upgraded body (pass through until closed)
                self.body_reader.init_close_delimited();
            } else if self.is_chunked_encoding() {
                // if chunked encoding, content-length should be ignored
                // TE is not visible at subrequest HttpTask level
                // so this means read until request closure
                self.body_reader.init_close_delimited();
            } else {
                let cl = header_value_content_length(self.get_header(header::CONTENT_LENGTH));
                match cl {
                    Some(i) => {
                        self.body_reader.init_content_length(i);
                    }
                    None => {
                        // Per RFC 9112: "Request messages are never close-delimited because they are
                        // always explicitly framed by length or transfer coding, with the absence of
                        // both implying the request ends immediately after the header section."
                        // All HTTP/1.x requests without Content-Length or Transfer-Encoding have 0 body
                        self.body_reader.init_content_length(0);
                    }
                }
            }
        }
    }

    pub fn retry_buffer_truncated(&self) -> bool {
        self.retry_buffer
            .as_ref()
            .map_or_else(|| false, |r| r.is_truncated())
    }

    pub fn enable_retry_buffering(&mut self) {
        if self.retry_buffer.is_none() {
            self.retry_buffer = Some(FixedBuffer::new(BODY_BUF_LIMIT))
        }
    }

    pub fn get_retry_buffer(&self) -> Option<Bytes> {
        self.retry_buffer.as_ref().and_then(|b| {
            if b.is_truncated() {
                None
            } else {
                b.get_buffer()
            }
        })
    }

    /// This function will (async) block forever until the client closes the connection.
    pub async fn idle(&mut self) -> Result<HttpTask> {
        let rx = self.rx.as_mut().expect("rx valid before shutdown");
        let mut task = rx
            .recv()
            .await
            .or_err(ReadError, "during HTTP idle state")?;
        // just consume empty body or done messages, the downstream channel is not a real
        // connection and only used for this one request
        while matches!(&task, HttpTask::Done)
            || matches!(&task, HttpTask::Body(b, _) if b.as_ref().is_none_or(|b| b.is_empty()))
        {
            task = rx
                .recv()
                .await
                .or_err(ReadError, "during HTTP idle state")?;
        }
        Ok(task)
    }

    /// This function will return body bytes (same as [`Self::read_body_bytes()`]), but after
    /// the client body finishes (`Ok(None)` is returned), calling this function again will block
    /// forever, same as [`Self::idle()`].
    pub async fn read_body_or_idle(&mut self, no_body_expected: bool) -> Result<Option<Bytes>> {
        if no_body_expected || self.is_body_done() {
            let read_task = self.idle().await?;
            Error::e_explain(
                ConnectError,
                format!("Sent unexpected task {read_task:?} after end of body (subrequest)"),
            )
        } else {
            self.read_body_bytes().await
        }
    }

    /// Return the raw bytes of the request header.
    pub fn get_headers_raw_bytes(&self) -> Bytes {
        self.v1_inner.get_headers_raw_bytes()
    }

    /// Close the subrequest channels, indicating that no more data will be sent
    /// or received. This is expected to be called before dropping the `Session` itself.
    pub fn shutdown(&mut self) {
        drop(self.tx.take());
        drop(self.rx.take());
    }

    /// Sets the downstream read timeout. This will trigger if we're unable
    /// to read from the subrequest channels after `timeout`.
    pub fn set_read_timeout(&mut self, timeout: Option<Duration>) {
        self.read_timeout = timeout;
    }

    /// Get the downstream read timeout.
    pub fn get_read_timeout(&self) -> Option<Duration> {
        self.read_timeout
    }

    /// Sets the downstream write timeout. This will trigger if we're unable
    /// to write to the subrequest channel after `timeout`.
    pub fn set_write_timeout(&mut self, timeout: Option<Duration>) {
        self.write_timeout = timeout;
    }

    /// Get the downstream write timeout.
    pub fn get_write_timeout(&self) -> Option<Duration> {
        self.write_timeout
    }

    /// Sets the total drain timeout.
    /// Note that the downstream read timeout still applies between body byte reads.
    pub fn set_total_drain_timeout(&mut self, timeout: Option<Duration>) {
        self.total_drain_timeout = timeout;
    }

    /// Get the downstream total drain timeout.
    pub fn get_total_drain_timeout(&self) -> Option<Duration> {
        self.total_drain_timeout
    }

    /// Return the [Digest], this is originally from the main request.
    pub fn digest(&self) -> Option<&Digest> {
        self.digest.as_deref()
    }

    /// Return a mutable [Digest] reference.
    pub fn digest_mut(&mut self) -> Option<&mut Digest> {
        self.digest.as_deref_mut()
    }

    /// Return the client (peer) address of the main request.
    pub fn client_addr(&self) -> Option<&SocketAddr> {
        self.digest()
            .and_then(|d| d.socket_digest.as_ref())
            .map(|d| d.peer_addr())?
    }

    /// Return the server (local) address of the main request.
    pub fn server_addr(&self) -> Option<&SocketAddr> {
        self.digest()
            .and_then(|d| d.socket_digest.as_ref())
            .map(|d| d.local_addr())?
    }

    /// Write a `100 Continue` response to the client.
    pub async fn write_continue_response(&mut self) -> Result<()> {
        // only send if we haven't already
        if self.response_written.is_none() {
            // size hint Some(0) because default is 8
            return self
                .write_response_header(Box::new(ResponseHeader::build(100, Some(0)).unwrap()))
                .await;
        }
        Ok(())
    }

    async fn write_non_empty_body(&mut self, data: Option<Bytes>, upgraded: bool) -> Result<()> {
        if upgraded != self.upgraded {
            if upgraded {
                panic!("Unexpected UpgradedBody task received on un-upgraded downstream session (subrequest)");
            } else {
                panic!("Unexpected Body task received on upgraded downstream session (subrequest)");
            }
        }
        let Some(d) = data else {
            return Ok(());
        };
        if d.is_empty() {
            return Ok(());
        }
        self.write_body(d).await.map_err(|e| e.into_down())?;
        Ok(())
    }

    async fn response_duplex(&mut self, task: HttpTask) -> Result<bool> {
        let end_stream = match task {
            HttpTask::Header(header, end_stream) => {
                self.write_response_header(header)
                    .await
                    .map_err(|e| e.into_down())?;
                end_stream
            }
            HttpTask::Body(data, end_stream) => {
                self.write_non_empty_body(data, false).await?;
                end_stream
            }
            HttpTask::UpgradedBody(data, end_stream) => {
                self.write_non_empty_body(data, true).await?;
                end_stream
            }
            HttpTask::Trailer(trailers) => {
                self.write_trailers(trailers).await?;
                true
            }
            HttpTask::Done => true,
            HttpTask::Failed(e) => return Err(e),
        };
        if end_stream {
            // no-op if body wasn't initialized or is finished already
            self.finish().await.map_err(|e| e.into_down())?;
        }
        Ok(end_stream || self.body_writer.finished())
    }

    // TODO: use vectored write to avoid copying
    pub async fn response_duplex_vec(&mut self, mut tasks: Vec<HttpTask>) -> Result<bool> {
        // TODO: send httptask failed on each error?
        let n_tasks = tasks.len();
        if n_tasks == 1 {
            // fallback to single operation to avoid copy
            return self.response_duplex(tasks.pop().unwrap()).await;
        }
        let mut end_stream = false;
        for task in tasks.into_iter() {
            end_stream = match task {
                HttpTask::Header(header, end_stream) => {
                    self.write_response_header(header)
                        .await
                        .map_err(|e| e.into_down())?;
                    end_stream
                }
                HttpTask::Body(data, end_stream) => {
                    self.write_non_empty_body(data, false).await?;
                    end_stream
                }
                HttpTask::UpgradedBody(data, end_stream) => {
                    self.write_non_empty_body(data, true).await?;
                    end_stream
                }
                HttpTask::Done => {
                    // write done
                    // we'll send HttpTask::Done at the end of this loop in finish
                    true
                }
                HttpTask::Trailer(trailers) => {
                    self.write_trailers(trailers).await?;
                    true
                }
                HttpTask::Failed(e) => {
                    // write failed
                    // error should also be returned when sender drops
                    return Err(e);
                }
            } || end_stream; // safe guard in case `end` in tasks flips from true to false
        }
        if end_stream {
            // no-op if body wasn't initialized or is finished already
            self.finish().await.map_err(|e| e.into_down())?;
        }
        Ok(end_stream || self.body_writer.finished())
    }

    /// Write response trailers to the client, this also closes the stream.
    pub async fn write_trailers(&mut self, trailers: Option<Box<HeaderMap>>) -> Result<()> {
        self.body_writer
            .write_trailers(
                self.tx.as_mut().expect("tx valid before shutdown"),
                trailers,
            )
            .await
    }
}

#[cfg(test)]
mod tests_stream {
    use super::*;
    use crate::protocols::http::subrequest::body::{BodyMode, ParseState};
    use bytes::BufMut;
    use http::StatusCode;
    use rstest::rstest;

    use std::str;
    use tokio_test::io::Builder;

    fn init_log() {
        let _ = env_logger::builder().is_test(true).try_init();
    }

    async fn session_from_input(input: &[u8]) -> (HttpSession, SubrequestHandle) {
        let mock_io = Builder::new().read(input).build();
        let mut http_stream = GenericHttpSession::new_http1(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        let (mut http_stream, handle) = HttpSession::new_from_session(&http_stream);
        http_stream.read_request().await.unwrap();
        (http_stream, handle)
    }

    async fn build_upgrade_req(upgrade: &str, conn: &str) -> (HttpSession, SubrequestHandle) {
        let input = format!("GET / HTTP/1.1\r\nHost: pingora.org\r\nUpgrade: {upgrade}\r\nConnection: {conn}\r\n\r\n");
        session_from_input(input.as_bytes()).await
    }

    async fn build_req() -> (HttpSession, SubrequestHandle) {
        let input = "GET / HTTP/1.1\r\nHost: pingora.org\r\n\r\n".to_string();
        session_from_input(input.as_bytes()).await
    }

    #[tokio::test]
    async fn read_basic() {
        init_log();
        let input = b"GET / HTTP/1.1\r\n\r\n";
        let (http_stream, _handle) = session_from_input(input).await;
        assert_eq!(0, http_stream.req_header().headers.len());
        assert_eq!(Method::GET, http_stream.req_header().method);
        assert_eq!(b"/", http_stream.req_header().uri.path().as_bytes());
    }

    #[tokio::test]
    async fn read_upgrade_req() {
        // http 1.0
        let input = b"GET / HTTP/1.0\r\nHost: pingora.org\r\nUpgrade: websocket\r\nConnection: upgrade\r\n\r\n";
        let (http_stream, _handle) = session_from_input(input).await;
        assert!(!http_stream.is_upgrade_req());

        // different method
        let input = b"POST / HTTP/1.1\r\nHost: pingora.org\r\nUpgrade: websocket\r\nConnection: upgrade\r\n\r\n";
        let (http_stream, _handle) = session_from_input(input).await;
        assert!(http_stream.is_upgrade_req());

        // missing upgrade header
        let input = b"GET / HTTP/1.1\r\nHost: pingora.org\r\nConnection: upgrade\r\n\r\n";
        let (http_stream, _handle) = session_from_input(input).await;
        assert!(!http_stream.is_upgrade_req());

        // no connection header
        let input = b"GET / HTTP/1.1\r\nHost: pingora.org\r\nUpgrade: WebSocket\r\n\r\n";
        let (http_stream, _handle) = session_from_input(input).await;
        assert!(http_stream.is_upgrade_req());

        let (http_stream, _handle) = build_upgrade_req("websocket", "Upgrade").await;
        assert!(http_stream.is_upgrade_req());

        // mixed case
        let (http_stream, _handle) = build_upgrade_req("WebSocket", "Upgrade").await;
        assert!(http_stream.is_upgrade_req());
    }

    #[tokio::test]
    async fn read_upgrade_req_with_1xx_response() {
        let (mut http_stream, _handle) = build_upgrade_req("websocket", "upgrade").await;
        assert!(http_stream.is_upgrade_req());
        let mut response = ResponseHeader::build(StatusCode::CONTINUE, None).unwrap();
        response.set_version(http::Version::HTTP_11);
        http_stream
            .write_response_header(Box::new(response))
            .await
            .unwrap();
        // 100 won't affect body state
        assert!(http_stream.is_body_done());
    }

    #[tokio::test]
    async fn write() {
        let (mut http_stream, mut handle) = build_req().await;
        let mut new_response = ResponseHeader::build(StatusCode::OK, None).unwrap();
        new_response.append_header("Foo", "Bar").unwrap();
        http_stream
            .write_response_header_ref(&new_response)
            .await
            .unwrap();
        match handle.rx.try_recv().unwrap() {
            HttpTask::Header(header, end) => {
                assert_eq!(header.status, StatusCode::OK);
                assert_eq!(header.headers["foo"], "Bar");
                assert!(!end);
            }
            t => panic!("unexpected task {t:?}"),
        }
    }

    #[tokio::test]
    async fn write_informational() {
        let (mut http_stream, mut handle) = build_req().await;
        let response_100 = ResponseHeader::build(StatusCode::CONTINUE, None).unwrap();
        http_stream
            .write_response_header_ref(&response_100)
            .await
            .unwrap();
        match handle.rx.try_recv().unwrap() {
            HttpTask::Header(header, end) => {
                assert_eq!(header.status, StatusCode::CONTINUE);
                assert!(!end);
            }
            t => panic!("unexpected task {t:?}"),
        }

        let response_200 = ResponseHeader::build(StatusCode::OK, None).unwrap();
        http_stream
            .write_response_header_ref(&response_200)
            .await
            .unwrap();
        match handle.rx.try_recv().unwrap() {
            HttpTask::Header(header, end) => {
                assert_eq!(header.status, StatusCode::OK);
                assert!(!end);
            }
            t => panic!("unexpected task {t:?}"),
        }
    }

    #[tokio::test]
    async fn write_101_switching_protocol() {
        let (mut http_stream, mut handle) = build_upgrade_req("WebSocket", "Upgrade").await;
        let mut response_101 =
            ResponseHeader::build(StatusCode::SWITCHING_PROTOCOLS, None).unwrap();
        response_101.append_header("Foo", "Bar").unwrap();
        http_stream
            .write_response_header_ref(&response_101)
            .await
            .unwrap();

        match handle.rx.try_recv().unwrap() {
            HttpTask::Header(header, end) => {
                assert_eq!(header.status, StatusCode::SWITCHING_PROTOCOLS);
                assert!(!end);
            }
            t => panic!("unexpected task {t:?}"),
        }
        assert!(http_stream.upgraded);

        let wire_body = Bytes::from(&b"PAYLOAD"[..]);
        let n = http_stream
            .write_body(wire_body.clone())
            .await
            .unwrap()
            .unwrap();
        assert_eq!(wire_body.len(), n);
        // this write should be ignored
        let response_502 = ResponseHeader::build(StatusCode::BAD_GATEWAY, None).unwrap();
        http_stream
            .write_response_header_ref(&response_502)
            .await
            .unwrap();

        match handle.rx.try_recv().unwrap() {
            HttpTask::Body(body, _end) => {
                assert_eq!(body.unwrap().len(), n);
            }
            t => panic!("unexpected task {t:?}"),
        }
        assert_eq!(
            handle.rx.try_recv().unwrap_err(),
            mpsc::error::TryRecvError::Empty
        );
    }

    #[tokio::test]
    async fn write_body_cl() {
        let (mut http_stream, _handle) = build_req().await;
        let wire_body = Bytes::from(&b"a"[..]);
        let mut new_response = ResponseHeader::build(StatusCode::OK, None).unwrap();
        new_response.append_header("Content-Length", "1").unwrap();
        http_stream
            .write_response_header_ref(&new_response)
            .await
            .unwrap();
        assert_eq!(
            http_stream.body_writer.body_mode,
            BodyMode::ContentLength(1, 0)
        );
        let n = http_stream
            .write_body(wire_body.clone())
            .await
            .unwrap()
            .unwrap();
        assert_eq!(wire_body.len(), n);
        let n = http_stream.finish().await.unwrap().unwrap();
        assert_eq!(wire_body.len(), n);
    }

    #[tokio::test]
    async fn write_body_until_close() {
        let (mut http_stream, _handle) = build_req().await;
        let new_response = ResponseHeader::build(StatusCode::OK, None).unwrap();
        http_stream
            .write_response_header_ref(&new_response)
            .await
            .unwrap();
        assert_eq!(http_stream.body_writer.body_mode, BodyMode::UntilClose(0));
        let wire_body = Bytes::from(&b"PAYLOAD"[..]);
        let n = http_stream
            .write_body(wire_body.clone())
            .await
            .unwrap()
            .unwrap();
        assert_eq!(wire_body.len(), n);
        let n = http_stream.finish().await.unwrap().unwrap();
        assert_eq!(wire_body.len(), n);
    }

    #[tokio::test]
    async fn read_with_illegal() {
        init_log();
        let input1 = b"GET /a?q=b c HTTP/1.1\r\n";
        let input2 = b"Host: pingora.org\r\nContent-Length: 3\r\n\r\n";
        let input3 = b"abc";
        let mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut http_stream = GenericHttpSession::new_http1(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        let (mut http_stream, handle) = HttpSession::new_from_session(&http_stream);
        http_stream.read_request().await.unwrap();
        handle
            .tx
            .send(HttpTask::Body(Some(Bytes::from(&input3[..])), false))
            .await
            .unwrap();

        assert_eq!(http_stream.get_path(), &b"/a?q=b%20c"[..]);
        let res = http_stream.read_body().await.unwrap().unwrap();
        assert_eq!(res, &input3[..]);
        assert_eq!(http_stream.body_reader.body_state, ParseState::Complete(3));
    }

    #[tokio::test]
    async fn test_write_body_write_timeout() {
        let (mut http_stream, _handle) = build_req().await;
        http_stream.write_timeout = Some(Duration::from_millis(100));
        let mut new_response = ResponseHeader::build(StatusCode::OK, None).unwrap();
        new_response.append_header("Content-Length", "10").unwrap();
        http_stream
            .write_response_header_ref(&new_response)
            .await
            .unwrap();
        let body_write_buf = Bytes::from(&b"abc"[..]);
        http_stream
            .write_body(body_write_buf.clone())
            .await
            .unwrap();
        http_stream
            .write_body(body_write_buf.clone())
            .await
            .unwrap();
        http_stream.write_body(body_write_buf).await.unwrap();
        // channel full
        let last_body = Bytes::from(&b"a"[..]);
        let res = http_stream.write_body(last_body).await;
        assert_eq!(res.unwrap_err().etype(), &WriteTimedout);
    }

    #[tokio::test]
    async fn test_write_continue_resp() {
        let (mut http_stream, mut handle) = build_req().await;
        http_stream.write_continue_response().await.unwrap();
        match handle.rx.try_recv().unwrap() {
            HttpTask::Header(header, end) => {
                assert_eq!(header.status, StatusCode::CONTINUE);
                assert!(!end);
            }
            t => panic!("unexpected task {t:?}"),
        }
    }

    async fn session_from_input_no_validate(input: &[u8]) -> (HttpSession, SubrequestHandle) {
        let mock_io = Builder::new().read(input).build();
        let mut http_stream = GenericHttpSession::new_http1(Box::new(mock_io));
        // Read the request in v1 inner session to set up headers properly
        http_stream.read_request().await.unwrap();
        let (http_stream, handle) = HttpSession::new_from_session(&http_stream);
        (http_stream, handle)
    }

    #[rstest]
    #[case::negative("-1")]
    #[case::not_a_number("abc")]
    #[case::float("1.5")]
    #[case::empty("")]
    #[case::spaces("  ")]
    #[case::mixed("123abc")]
    #[tokio::test]
    async fn validate_request_rejects_invalid_content_length(#[case] invalid_value: &str) {
        init_log();
        let input = format!(
            "POST / HTTP/1.1\r\nHost: pingora.org\r\nContent-Length: {}\r\n\r\n",
            invalid_value
        );
        let mock_io = Builder::new().read(input.as_bytes()).build();
        let mut http_stream = GenericHttpSession::new_http1(Box::new(mock_io));
        // read_request calls validate_request internally on the v1 inner stream, so it should fail here
        let res = http_stream.read_request().await;
        assert!(res.is_err());
        assert_eq!(
            res.unwrap_err().etype(),
            &pingora_error::ErrorType::InvalidHTTPHeader
        );
    }

    #[rstest]
    #[case::valid_zero("0")]
    #[case::valid_small("123")]
    #[case::valid_large("999999")]
    #[tokio::test]
    async fn validate_request_accepts_valid_content_length(#[case] valid_value: &str) {
        init_log();
        let input = format!(
            "POST / HTTP/1.1\r\nHost: pingora.org\r\nContent-Length: {}\r\n\r\n",
            valid_value
        );
        let (mut http_stream, _handle) = session_from_input_no_validate(input.as_bytes()).await;
        let res = http_stream.read_request().await;
        assert!(res.is_ok());
    }

    #[tokio::test]
    async fn validate_request_accepts_no_content_length() {
        init_log();
        let input = b"GET / HTTP/1.1\r\nHost: pingora.org\r\n\r\n";
        let (mut http_stream, _handle) = session_from_input_no_validate(input).await;
        let res = http_stream.read_request().await;
        assert!(res.is_ok());
    }

    const POST_CL_UPGRADE_REQ: &[u8] = b"POST / HTTP/1.1\r\nHost: pingora.org\r\nUpgrade: websocket\r\nConnection: upgrade\r\nContent-Length: 10\r\n\r\n";
    const POST_CHUNKED_UPGRADE_REQ: &[u8] = b"POST / HTTP/1.1\r\nHost: pingora.org\r\nUpgrade: websocket\r\nConnection: upgrade\r\nTransfer-Encoding: chunked\r\n\r\n";
    const POST_BODY_DATA: &[u8] = b"abcdefghij";

    async fn build_upgrade_req_with_body(header: &[u8]) -> (HttpSession, SubrequestHandle) {
        let mock_io = Builder::new().read(header).build();
        let mut http_stream = GenericHttpSession::new_http1(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        let (mut http_stream, handle) = HttpSession::new_from_session(&http_stream);
        http_stream.read_request().await.unwrap();
        (http_stream, handle)
    }

    #[rstest]
    #[case::content_length(POST_CL_UPGRADE_REQ)]
    #[case::chunked(POST_CHUNKED_UPGRADE_REQ)]
    #[tokio::test]
    async fn read_upgrade_req_with_body(#[case] header: &[u8]) {
        init_log();
        let (mut http_stream, handle) = build_upgrade_req_with_body(header).await;
        assert!(http_stream.is_upgrade_req());
        // request has body
        assert!(!http_stream.is_body_done());

        // Send body via the handle
        handle
            .tx
            .send(HttpTask::Body(Some(Bytes::from(POST_BODY_DATA)), true))
            .await
            .unwrap();

        let mut buf = vec![];
        while let Some(b) = http_stream.read_body_bytes().await.unwrap() {
            buf.put_slice(&b);
        }
        assert_eq!(buf, POST_BODY_DATA);
        assert_eq!(http_stream.body_reader.body_state, ParseState::Complete(10));
        assert_eq!(http_stream.body_bytes_read(), 10);

        assert!(http_stream.is_body_done());

        let mut response = ResponseHeader::build(StatusCode::SWITCHING_PROTOCOLS, None).unwrap();
        response.set_version(http::Version::HTTP_11);
        http_stream
            .write_response_header(Box::new(response))
            .await
            .unwrap();
        // body reader type switches
        assert!(!http_stream.is_body_done());

        // now send ws data
        let ws_data = b"data";
        handle
            .tx
            .send(HttpTask::Body(Some(Bytes::from(&ws_data[..])), false))
            .await
            .unwrap();

        let buf = http_stream.read_body_bytes().await.unwrap().unwrap();
        assert_eq!(buf, ws_data.as_slice());
        assert!(!http_stream.is_body_done());

        // EOF ends body
        drop(handle.tx);
        assert!(http_stream.read_body_bytes().await.unwrap().is_none());
        assert!(http_stream.is_body_done());
    }
}


================================================
FILE: pingora-core/src/protocols/http/v1/body.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use bytes::{Buf, BufMut, Bytes, BytesMut};
use log::{debug, trace, warn};
use pingora_error::{
    Error,
    ErrorType::{self, *},
    OrErr, Result,
};
use std::fmt::Debug;
use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};

use crate::protocols::l4::stream::AsyncWriteVec;
use crate::utils::BufRef;

// TODO: make this dynamically adjusted
const BODY_BUFFER_SIZE: usize = 1024 * 64;
// limit how much incomplete chunk-size and chunk-ext to buffer
const PARTIAL_CHUNK_HEAD_LIMIT: usize = 1024 * 8;
// Trailers: https://datatracker.ietf.org/doc/html/rfc9112#section-7.1.2
// TODO: proper trailer handling and parsing
// generally trailers are an uncommonly used HTTP/1.1 feature, this is a somewhat
// arbitrary cap on trailer size after the 0 chunk size (like header buf)
const TRAILER_SIZE_LIMIT: usize = 1024 * 64;

const LAST_CHUNK: &[u8; 5] = &[b'0', CR, LF, CR, LF];
const CR: u8 = b'\r';
const LF: u8 = b'\n';
const CRLF: &[u8; 2] = &[CR, LF];
// This is really the CRLF end of the last trailer (or 0 chunk), + the last CRLF.
const TRAILERS_END: &[u8; 4] = &[CR, LF, CR, LF];

pub const INVALID_CHUNK: ErrorType = ErrorType::new("InvalidChunk");
pub const INVALID_TRAILER_END: ErrorType = ErrorType::new("InvalidTrailerEnd");
pub const PREMATURE_BODY_END: ErrorType = ErrorType::new("PrematureBodyEnd");

#[derive(Clone, Debug, PartialEq, Eq)]
pub enum ParseState {
    ToStart,
    // Complete: total size (contetn-length)
    Complete(usize),
    // Partial: size read, remaining size (content-length)
    Partial(usize, usize),
    // Chunked: Chunked encoding, prior to the final 0\r\n chunk.
    // size read, next to read in current buf start, read in current buf start, remaining chunked size to read from IO
    Chunked(usize, usize, usize, usize),
    // ChunkedFinal: Final section once the 0\r\n chunk is read.
    // size read, trailer sizes parsed so far, use existing buf end, trailers end read
    ChunkedFinal(usize, usize, usize, u8),
    // Done: done but there is error, size read
    Done(usize),
    // UntilClose: read until connection closed, size read
    UntilClose(usize),
}

type PS = ParseState;

impl ParseState {
    pub fn finish(&self, additional_bytes: usize) -> Self {
        match self {
            PS::Partial(read, to_read) => PS::Complete(read + to_read),
            PS::Chunked(read, _, _, _) => PS::Complete(read + additional_bytes),
            PS::ChunkedFinal(read, _, _, _) => PS::Complete(read + additional_bytes),
            PS::UntilClose(read) => PS::Complete(read + additional_bytes),
            _ => self.clone(), /* invalid transaction */
        }
    }

    pub fn done(&self, additional_bytes: usize) -> Self {
        match self {
            PS::Partial(read, _) => PS::Done(read + additional_bytes),
            PS::Chunked(read, _, _, _) => PS::Done(read + additional_bytes),
            PS::ChunkedFinal(read, _, _, _) => PS::Done(read + additional_bytes),
            PS::UntilClose(read) => PS::Done(read + additional_bytes),
            _ => self.clone(), /* invalid transaction */
        }
    }

    pub fn read_final_chunk(&self, remaining_buf_size: usize) -> Self {
        match self {
            PS::Chunked(read, _, _, _) => {
                // The BodyReader is currently expected to copy the remaining buf
                // into self.body_buf.
                //
                // the 2 == the CRLF from the last chunk-size, 0 + CRLF
                // because ChunkedFinal is looking for CRLF + CRLF to end
                // the whole message.
                // This extra 2 bytes technically ends up cutting into the max trailers size,
                // which we consider fine for now until full trailers support.
                PS::ChunkedFinal(*read, 0, remaining_buf_size, 2)
            }
            PS::ChunkedFinal(..) => panic!("already read final chunk"),
            _ => self.clone(), /* invalid transaction */
        }
    }

    pub fn partial_chunk(&self, bytes_read: usize, bytes_to_read: usize) -> Self {
        match self {
            PS::Chunked(read, _, _, _) => PS::Chunked(read + bytes_read, 0, 0, bytes_to_read),
            PS::ChunkedFinal(..) => panic!("chunked transactions not applicable after final chunk"),
            _ => self.clone(), /* invalid transaction */
        }
    }

    pub fn multi_chunk(&self, bytes_read: usize, buf_start_index: usize) -> Self {
        match self {
            PS::Chunked(read, _, buf_end, _) => {
                PS::Chunked(read + bytes_read, buf_start_index, *buf_end, 0)
            }
            PS::ChunkedFinal(..) => panic!("chunked transactions not applicable after final chunk"),
            _ => self.clone(), /* invalid transaction */
        }
    }

    pub fn partial_chunk_head(&self, head_end: usize, head_size: usize) -> Self {
        match self {
            /* inform reader to read more to form a legal chunk */
            PS::Chunked(read, _, _, _) => PS::Chunked(*read, 0, head_end, head_size),
            PS::ChunkedFinal(..) => panic!("chunked transactions not applicable after final chunk"),
            _ => self.clone(), /* invalid transaction */
        }
    }

    pub fn new_buf(&self, buf_end: usize) -> Self {
        match self {
            PS::Chunked(read, _, _, _) => PS::Chunked(*read, 0, buf_end, 0),
            PS::ChunkedFinal(..) => panic!("chunked transactions not applicable after final chunk"),
            _ => self.clone(), /* invalid transaction */
        }
    }
}

pub struct BodyReader {
    pub body_state: ParseState,
    pub body_buf: Option<BytesMut>,
    pub body_buf_size: usize,
    rewind_buf_len: usize,
    upstream: bool,
    body_buf_overread: Option<BytesMut>,
}

impl BodyReader {
    pub fn new(upstream: bool) -> Self {
        BodyReader {
            body_state: PS::ToStart,
            body_buf: None,
            body_buf_size: BODY_BUFFER_SIZE,
            rewind_buf_len: 0,
            upstream,
            body_buf_overread: None,
        }
    }

    pub fn need_init(&self) -> bool {
        matches!(self.body_state, PS::ToStart)
    }

    pub fn reinit(&mut self) {
        self.body_state = PS::ToStart;
    }

    fn prepare_buf(&mut self, buf_to_rewind: &[u8]) {
        let mut body_buf = BytesMut::with_capacity(self.body_buf_size);
        if !buf_to_rewind.is_empty() {
            self.rewind_buf_len = buf_to_rewind.len();
            // TODO: this is still 1 copy. Make it zero
            body_buf.put_slice(buf_to_rewind);
        }
        if self.body_buf_size > buf_to_rewind.len() {
            //body_buf.resize(self.body_buf_size, 0);
            unsafe {
                body_buf.set_len(self.body_buf_size);
            }
        }
        self.body_buf = Some(body_buf);
    }

    pub fn init_chunked(&mut self, buf_to_rewind: &[u8]) {
        self.body_state = PS::Chunked(0, 0, 0, 0);
        self.prepare_buf(buf_to_rewind);
    }

    pub fn init_content_length(&mut self, cl: usize, buf_to_rewind: &[u8]) {
        match cl {
            0 => {
                self.body_state = PS::Complete(0);
                // Store any extra bytes that were read as overread
                if !buf_to_rewind.is_empty() {
                    let mut overread = BytesMut::with_capacity(buf_to_rewind.len());
                    overread.put_slice(buf_to_rewind);
                    self.body_buf_overread = Some(overread);
                }
            }
            _ => {
                self.prepare_buf(buf_to_rewind);
                self.body_state = PS::Partial(0, cl);
            }
        }
    }

    pub fn init_close_delimited(&mut self, buf_to_rewind: &[u8]) {
        self.prepare_buf(buf_to_rewind);
        self.body_state = PS::UntilClose(0);
    }

    /// Convert how we interpret the remainder of the body to read until close.
    /// This is used for responses without explicit framing (e.g., HTTP/1.0 responses).
    ///
    /// Does nothing if already in close-delimited mode.
    pub fn convert_to_close_delimited(&mut self) {
        if matches!(self.body_state, PS::UntilClose(_)) {
            // nothing to do, already in close-delimited mode
            return;
        }

        if self.rewind_buf_len == 0 {
            // take any extra bytes and send them as-is,
            // reset body counter
            let extra = self.body_buf_overread.take();
            let buf = extra.as_deref().unwrap_or_default();
            self.prepare_buf(buf);
        } // if rewind_buf_len is not 0, body read has not yet been polled
        self.body_state = PS::UntilClose(0);
    }

    pub fn get_body(&self, buf_ref: &BufRef) -> &[u8] {
        // TODO: these get_*() could panic. handle them better
        buf_ref.get(self.body_buf.as_ref().unwrap())
    }

    #[allow(dead_code)]
    pub fn get_body_overread(&self) -> Option<&[u8]> {
        self.body_buf_overread.as_deref()
    }

    pub fn has_bytes_overread(&self) -> bool {
        self.get_body_overread().is_some_and(|b| !b.is_empty())
    }

    pub fn body_done(&self) -> bool {
        matches!(self.body_state, PS::Complete(_) | PS::Done(_))
    }

    pub fn body_empty(&self) -> bool {
        self.body_state == PS::Complete(0)
    }

    fn finish_body_buf(&mut self, end_of_body: usize, total_read: usize) {
        let body_buf_mut = self.body_buf.as_mut().expect("must have read body buf");
        // remove unused buffer
        body_buf_mut.truncate(total_read);
        let overread_bytes = body_buf_mut.split_off(end_of_body);
        self.body_buf_overread = (!overread_bytes.is_empty()).then_some(overread_bytes);
    }

    pub async fn read_body<S>(&mut self, stream: &mut S) -> Result<Option<BufRef>>
    where
        S: AsyncRead + Unpin + Send,
    {
        match self.body_state {
            PS::Complete(_) => Ok(None),
            PS::Done(_) => Ok(None),
            PS::Partial(_, _) => self.do_read_body(stream).await,
            PS::Chunked(..) => self.do_read_chunked_body(stream).await,
            PS::ChunkedFinal(..) => self.do_read_chunked_body_final(stream).await,
            PS::UntilClose(_) => self.do_read_body_until_closed(stream).await,
            PS::ToStart => panic!("need to init BodyReader first"),
        }
    }

    pub async fn do_read_body<S>(&mut self, stream: &mut S) -> Result<Option<BufRef>>
    where
        S: AsyncRead + Unpin + Send,
    {
        let mut body_buf = self.body_buf.as_deref_mut().unwrap();
        let mut n = self.rewind_buf_len;
        self.rewind_buf_len = 0; // we only need to read rewind data once
        if n == 0 {
            // downstream should not discard remaining data if peer sent more.
            if !self.upstream {
                if let PS::Partial(_, to_read) = self.body_state {
                    if to_read < body_buf.len() {
                        body_buf = &mut body_buf[..to_read];
                    }
                }
            }
            /* Need to actually read */
            n = stream
                .read(body_buf)
                .await
                .or_err(ReadError, "when reading body")?;
        }
        match self.body_state {
            PS::Partial(read, to_read) => {
                debug!(
                    "BodyReader body_state: {:?}, read data from IO: {n}",
                    self.body_state
                );
                if n == 0 {
                    self.body_state = PS::Done(read);
                    Error::e_explain(ConnectionClosed, format!(
                        "Peer prematurely closed connection with {} bytes of body remaining to read",
                        to_read
                    ))
                } else if n >= to_read {
                    if n > to_read {
                        warn!(
                            "Peer sent more data then expected: extra {}\
                               bytes, discarding them",
                            n - to_read
                        )
                    }
                    self.body_state = PS::Complete(read + to_read);
                    self.finish_body_buf(to_read, n);
                    Ok(Some(BufRef::new(0, to_read)))
                } else {
                    self.body_state = PS::Partial(read + n, to_read - n);
                    Ok(Some(BufRef::new(0, n)))
                }
            }
            _ => panic!("wrong body state: {:?}", self.body_state),
        }
    }

    pub async fn do_read_body_until_closed<S>(&mut self, stream: &mut S) -> Result<Option<BufRef>>
    where
        S: AsyncRead + Unpin + Send,
    {
        let body_buf = self.body_buf.as_deref_mut().unwrap();
        let mut n = self.rewind_buf_len;
        self.rewind_buf_len = 0; // we only need to read rewind data once
        if n == 0 {
            /* Need to actually read */
            n = stream
                .read(body_buf)
                .await
                .or_err(ReadError, "when reading body")?;
        }
        match self.body_state {
            PS::UntilClose(read) => {
                if n == 0 {
                    self.body_state = PS::Complete(read);
                    Ok(None)
                } else {
                    self.body_state = PS::UntilClose(read + n);
                    Ok(Some(BufRef::new(0, n)))
                }
            }
            _ => panic!("wrong body state: {:?}", self.body_state),
        }
    }

    pub async fn do_read_chunked_body<S>(&mut self, stream: &mut S) -> Result<Option<BufRef>>
    where
        S: AsyncRead + Unpin + Send,
    {
        match self.body_state {
            PS::Chunked(
                total_read,
                existing_buf_start,
                mut existing_buf_end,
                mut expecting_from_io,
            ) => {
                if existing_buf_start == 0 {
                    // read a new buf from IO
                    let body_buf = self.body_buf.as_deref_mut().unwrap();
                    if existing_buf_end == 0 {
                        existing_buf_end = self.rewind_buf_len;
                        self.rewind_buf_len = 0; // we only need to read rewind data once
                        if existing_buf_end == 0 {
                            existing_buf_end = stream
                                .read(body_buf)
                                .await
                                .or_err(ReadError, "when reading body")?;
                        }
                    } else {
                        /* existing_buf_end != 0 this is partial chunk head */
                        /* copy the #expecting_from_io bytes until index existing_buf_end
                         * to the front and read more to form a valid chunk head.
                         * existing_buf_end is the end of the partial head and
                         * expecting_from_io is the len of it */
                        body_buf
                            .copy_within(existing_buf_end - expecting_from_io..existing_buf_end, 0);
                        let new_bytes = stream
                            .read(&mut body_buf[expecting_from_io..])
                            .await
                            .or_err(ReadError, "when reading body")?;
                        if new_bytes == 0 {
                            self.body_state = self.body_state.done(0);
                            return Error::e_explain(
                                ConnectionClosed,
                                format!(
                                    "Connection prematurely closed without the termination chunk \
                                    (partial chunk head), read {total_read} bytes"
                                ),
                            );
                        }

                        /* more data is read, extend the buffer */
                        existing_buf_end = expecting_from_io + new_bytes;
                        expecting_from_io = 0;
                    }
                    self.body_state = self.body_state.new_buf(existing_buf_end);
                }
                if existing_buf_end == 0 {
                    self.body_state = self.body_state.done(0);
                    Error::e_explain(
                        ConnectionClosed,
                        format!(
                            "Connection prematurely closed without the termination chunk, \
                            read {total_read} bytes"
                        ),
                    )
                } else {
                    if expecting_from_io > 0 {
                        let body_buf = self.body_buf.as_ref().unwrap();
                        trace!(
                            "partial chunk payload, expecting_from_io: {}, \
                                existing_buf_end {}, buf: {:?}",
                            expecting_from_io,
                            existing_buf_end,
                            self.body_buf.as_ref().unwrap()[..existing_buf_end].escape_ascii()
                        );

                        // partial chunk payload, will read more
                        if expecting_from_io >= existing_buf_end + 2 {
                            // not enough (doesn't contain CRLF end)
                            self.body_state = self.body_state.partial_chunk(
                                existing_buf_end,
                                expecting_from_io - existing_buf_end,
                            );
                            return Ok(Some(BufRef::new(0, existing_buf_end)));
                        }
                        /* could be expecting DATA + CRLF or just CRLF */
                        let payload_size = expecting_from_io.saturating_sub(2);
                        /* expecting_from_io < existing_buf_end + 2 */
                        let need_lf_only = expecting_from_io == 1; // otherwise we need the whole CRLF
                        if expecting_from_io > existing_buf_end {
                            // potentially:
                            // | CR | LF |
                            //      |    |
                            // (existing_buf_end)
                            //           |
                            //           (expecting_from_io)
                            if payload_size < existing_buf_end {
                                Self::validate_crlf(
                                    &mut self.body_state,
                                    &body_buf[payload_size..existing_buf_end],
                                    need_lf_only,
                                    false,
                                )?;
                            }
                        } else {
                            // expecting_from_io <= existing_buf_end
                            // chunk CRLF end should end here
                            assert!(Self::validate_crlf(
                                &mut self.body_state,
                                &body_buf[payload_size..expecting_from_io],
                                need_lf_only,
                                false,
                            )?);
                        }
                        if expecting_from_io >= existing_buf_end {
                            self.body_state = self
                                .body_state
                                .partial_chunk(payload_size, expecting_from_io - existing_buf_end);

                            return Ok(Some(BufRef::new(0, payload_size)));
                        }

                        /* expecting_from_io < existing_buf_end */
                        self.body_state =
                            self.body_state.multi_chunk(payload_size, expecting_from_io);

                        return Ok(Some(BufRef::new(0, payload_size)));
                    }
                    let (buf_res, last_chunk_size_end) =
                        self.parse_chunked_buf(existing_buf_start, existing_buf_end)?;
                    if buf_res.is_some() {
                        if let Some(idx) = last_chunk_size_end {
                            // just read the last 0 + CRLF, but not final end CRLF
                            // copy the rest of the buffer to the start of the body_buf
                            // so we can parse the remaining bytes as trailers / end
                            let body_buf = self.body_buf.as_deref_mut().unwrap();
                            trace!(
                                "last chunk size end buf {:?}",
                                &body_buf[..existing_buf_end].escape_ascii(),
                            );
                            body_buf.copy_within(idx..existing_buf_end, 0);
                        }
                    }
                    Ok(buf_res)
                }
            }
            _ => panic!("wrong body state: {:?}", self.body_state),
        }
    }

    // Returns: BufRef of next body chunk,
    // terminating chunk-size index end if read completely (0 + CRLF).
    // Note input indices are absolute (to body_buf).
    fn parse_chunked_buf(
        &mut self,
        buf_index_start: usize,
        buf_index_end: usize,
    ) -> Result<(Option<BufRef>, Option<usize>)> {
        let buf = &self.body_buf.as_ref().unwrap()[buf_index_start..buf_index_end];
        let chunk_status = httparse::parse_chunk_size(buf);
        match chunk_status {
            Ok(status) => {
                match status {
                    httparse::Status::Complete((payload_index, chunk_size)) => {
                        // TODO: Check chunk_size overflow
                        trace!(
                            "Got size {chunk_size}, payload_index: {payload_index}, chunk: {:?}",
                            String::from_utf8_lossy(buf).escape_default(),
                        );
                        let chunk_size = chunk_size as usize;
                        // https://github.com/seanmonstar/httparse/issues/149
                        // httparse does not treat zero-size chunk differently, it does not check
                        // that terminating chunk is 0 + double CRLF
                        if chunk_size == 0 {
                            /* terminating chunk, also need to handle trailer. */
                            let chunk_end_index = payload_index + 2;
                            return if chunk_end_index <= buf.len()
                                && buf[payload_index..chunk_end_index] == CRLF[..]
                            {
                                // full terminating CRLF MAY exist in current buf
                                // Skip ChunkedFinal state and go directly to Complete
                                // as optimization.
                                self.body_state = self.body_state.finish(0);
                                self.finish_body_buf(
                                    buf_index_start + chunk_end_index,
                                    buf_index_end,
                                );
                                Ok((None, Some(buf_index_start + payload_index)))
                            } else {
                                // Indicate start of parsing final chunked trailers,
                                // with remaining buf to read
                                self.body_state = self.body_state.read_final_chunk(
                                    buf_index_end - (buf_index_start + payload_index),
                                );

                                Ok((
                                    Some(BufRef::new(0, 0)),
                                    Some(buf_index_start + payload_index),
                                ))
                            };
                        }
                        // chunk-size CRLF [payload_index] byte*[chunk_size] CRLF
                        let data_end_index = payload_index + chunk_size;
                        let chunk_end_index = data_end_index + 2;
                        if chunk_end_index >= buf.len() {
                            // no multi chunk in this buf
                            let actual_size = if data_end_index > buf.len() {
                                buf.len() - payload_index
                            } else {
                                chunk_size
                            };

                            let crlf_start = chunk_end_index.saturating_sub(2);
                            if crlf_start < buf.len() {
                                Self::validate_crlf(
                                    &mut self.body_state,
                                    &buf[crlf_start..],
                                    false,
                                    false,
                                )?;
                            }
                            // else need to read more to get to CRLF

                            self.body_state = self
                                .body_state
                                .partial_chunk(actual_size, chunk_end_index - buf.len());
                            return Ok((
                                Some(BufRef::new(buf_index_start + payload_index, actual_size)),
                                None,
                            ));
                        }
                        /* got multiple chunks, return the first */
                        assert!(Self::validate_crlf(
                            &mut self.body_state,
                            &buf[data_end_index..chunk_end_index],
                            false,
                            false,
                        )?);
                        self.body_state = self
                            .body_state
                            .multi_chunk(chunk_size, buf_index_start + chunk_end_index);
                        Ok((
                            Some(BufRef::new(buf_index_start + payload_index, chunk_size)),
                            None,
                        ))
                    }
                    httparse::Status::Partial => {
                        if buf.len() > PARTIAL_CHUNK_HEAD_LIMIT {
                            // https://datatracker.ietf.org/doc/html/rfc9112#name-chunk-extensions
                            // "A server ought to limit the total length of chunk extensions received"
                            // The buf.len() here is the total length of chunk-size + chunk-ext seen
                            // so far. This check applies to both server and client
                            self.body_state = self.body_state.done(0);
                            Error::e_explain(INVALID_CHUNK, "Chunk ext over limit")
                        } else {
                            self.body_state =
                                self.body_state.partial_chunk_head(buf_index_end, buf.len());
                            Ok((Some(BufRef::new(0, 0)), None))
                        }
                    }
                }
            }
            Err(e) => {
                let context = format!("Invalid chunked encoding: {e:?}");
                debug!(
                    "{context}, {:?}",
                    String::from_utf8_lossy(buf).escape_default()
                );
                self.body_state = self.body_state.done(0);
                Error::e_explain(INVALID_CHUNK, context)
            }
        }
    }

    pub async fn do_read_chunked_body_final<S>(&mut self, stream: &mut S) -> Result<Option<BufRef>>
    where
        S: AsyncRead + Unpin + Send,
    {
        // parse section after last-chunk: https://datatracker.ietf.org/doc/html/rfc9112#section-7.1
        // This is the section after the final chunk we're trying to read, which can include
        // HTTP1 trailers (currently we just discard them).
        // Really we are just waiting for a consecutive CRLF + CRLF to end the body.
        match self.body_state {
            PS::ChunkedFinal(read, trailers_read, existing_buf_end, end_read) => {
                let body_buf = self.body_buf.as_deref_mut().unwrap();
                let (buf, n) = if existing_buf_end != 0 {
                    // finish rest of buf that was read with Chunked state
                    // existing_buf_end is non-zero only once
                    self.body_state = PS::ChunkedFinal(read, trailers_read, 0, end_read);
                    (&body_buf[..existing_buf_end], existing_buf_end)
                } else {
                    let n = stream
                        .read(body_buf)
                        .await
                        .or_err(ReadError, "when reading trailers end")?;

                    (&body_buf[..n], n)
                };

                if n == 0 {
                    self.body_state = PS::Done(read);
                    return Error::e_explain(
                        ConnectionClosed,
                        format!(
                            "Connection prematurely closed without the termination chunk, \
                            read {read} bytes, {trailers_read} trailer bytes"
                        ),
                    );
                }

                let mut start = 0;
                // try to find end within the current IO buffer
                while start < n {
                    // Adjusts body state through each iteration to add trailers read
                    // Each iteration finds the next CR or LF to advance the buf
                    let (trailers_read, end_read) = match self.body_state {
                        PS::ChunkedFinal(_, new_trailers_read, _, new_end_read) => {
                            (new_trailers_read, new_end_read)
                        }
                        _ => unreachable!(),
                    };

                    let mut buf = &buf[start..n];
                    trace!(
                        "Parsing chunk end for buf {:?}",
                        String::from_utf8_lossy(buf).escape_default(),
                    );

                    if end_read == 0 {
                        // find the next CRLF sequence / potential end
                        let (trailers_read, no_crlf) =
                            if let Some(p) = buf.iter().position(|b| *b == CR || *b == LF) {
                                buf = &buf[p..];
                                start += p;
                                (trailers_read + p, false)
                            } else {
                                // consider this all trailer bytes
                                (trailers_read + (n - start), true)
                            };

                        if trailers_read > TRAILER_SIZE_LIMIT {
                            self.body_state = self.body_state.done(0);
                            return Error::e_explain(
                                INVALID_TRAILER_END,
                                "Trailer size over limit",
                            );
                        }

                        self.body_state = PS::ChunkedFinal(read, trailers_read, 0, 0);

                        if no_crlf {
                            // break and allow polling read body again
                            break;
                        }
                    }
                    match Self::parse_trailers_end(&mut self.body_state, buf)? {
                        TrailersEndParseState::NotEnd(next_parse_index) => {
                            trace!(
                                "Parsing chunk end for buf {:?}, resume at {next_parse_index}",
                                String::from_utf8_lossy(buf).escape_default(),
                            );

                            start += next_parse_index;
                        }
                        TrailersEndParseState::Complete(end_idx) => {
                            trace!(
                                "Parsing chunk end for buf {:?}, finished at {end_idx}",
                                String::from_utf8_lossy(buf).escape_default(),
                            );

                            self.finish_body_buf(start + end_idx, n);
                            return Ok(None);
                        }
                    }
                }
            }
            _ => panic!("wrong body state: {:?}", self.body_state),
        }
        // indicate final section is not done
        Ok(Some(BufRef(0, 0)))
    }

    // Parses up to one CRLF at a time to determine if, given the body state,
    // we've parsed a full trailer end.
    // Panics if empty buffer is given.
    fn parse_trailers_end(
        body_state: &mut ParseState,
        buf: &[u8],
    ) -> Result<TrailersEndParseState> {
        assert!(!buf.is_empty(), "parse_trailers_end given empty buffer");

        match body_state.clone() {
            PS::ChunkedFinal(read, trailers_read, _, end_read) => {
                // Look at the body buf we just read and see if it matches
                // the ending CRLF + CRLF sequence.
                let end_read = end_read as usize;
                assert!(end_read < TRAILERS_END.len());
                let to_read = std::cmp::min(buf.len(), TRAILERS_END.len() - end_read);
                let buf = &buf[..to_read];

                // If the start of the buf is not CRLF and we are not in the middle of reading a
                // valid CRLF sequence, return to let caller seek for next CRLF
                if end_read % 2 == 0 && buf[0] != CR && buf[0] != LF {
                    trace!(
                        "parse trailers end {:?}, not CRLF sequence",
                        String::from_utf8_lossy(buf).escape_default(),
                    );
                    *body_state = PS::ChunkedFinal(read, trailers_read + end_read, 0, 0);
                    return Ok(TrailersEndParseState::NotEnd(0));
                }
                // Check for malformed CRLF in trailers (or final end of trailers section)
                let next_parse_index = match end_read {
                    0 | 2 => {
                        // expect start with CR
                        if Self::validate_crlf(body_state, buf, false, true)? {
                            // found CR + LF
                            2
                        } else {
                            // read CR at least
                            1
                        }
                    }
                    1 | 3 => {
                        // assert: only way this can return false is with an empty buffer
                        assert!(Self::validate_crlf(body_state, buf, true, true)?);
                        1
                    }
                    _ => unreachable!(),
                };
                let next_end_read = end_read + next_parse_index;
                let finished = next_end_read == TRAILERS_END.len();
                if finished {
                    trace!(
                        "parse trailers end {:?}, complete {next_end_read}",
                        String::from_utf8_lossy(buf).escape_default(),
                    );
                    *body_state = PS::Complete(read);
                    Ok(TrailersEndParseState::Complete(next_parse_index))
                } else {
                    // either we read the end of one trailer and another one follows,
                    // or trailer end CRLF sequence so far is valid but we need more bytes
                    // to determine if more CRLF actually follows
                    trace!(
                        "parse trailers end {:?}, resume at {next_parse_index}",
                        String::from_utf8_lossy(buf).escape_default(),
                    );
                    // unwrap safety for try_into() u8: next_end_read always <
                    // TRAILERS_END.len()
                    *body_state =
                        PS::ChunkedFinal(read, trailers_read, 0, next_end_read.try_into().unwrap());
                    Ok(TrailersEndParseState::NotEnd(next_parse_index))
                }
            }
            _ => panic!("wrong body state: {:?}", body_state),
        }
    }

    // Validates that the starting bytes of `buf` are the expected CRLF bytes.
    // Expects: buf that starts at the indices where CRLF should be for chunked bodies.
    // If need_lf_only, we will only check for LF, else we will check starting with CR.
    //
    // Returns Ok() if buf begins with expected bytes (CR, LF, or CRLF).
    // The inner bool returned is whether the whole CRLF sequence was completed.
    fn validate_crlf(
        body_state: &mut ParseState,
        buf: &[u8],
        need_lf_only: bool,
        for_trailer_end: bool,
    ) -> Result<bool> {
        let etype = if for_trailer_end {
            INVALID_TRAILER_END
        } else {
            INVALID_CHUNK
        };
        if need_lf_only {
            if buf.is_empty() {
                Ok(false)
            } else {
                let b = &buf[..1];
                if b == b"\n" {
                    // only LF left
                    Ok(true)
                } else {
                    *body_state = body_state.done(0);
                    Error::e_explain(
                        etype,
                        format!(
                            "Invalid chunked encoding: {} was not LF",
                            String::from_utf8_lossy(b).escape_default(),
                        ),
                    )
                }
            }
        } else {
            match buf.len() {
                0 => Ok(false),
                1 => {
                    let b = &buf[..1];
                    if b == b"\r" {
                        Ok(false)
                    } else {
                        *body_state = body_state.done(0);
                        Error::e_explain(
                            etype,
                            format!(
                                "Invalid chunked encoding: {} was not CR",
                                String::from_utf8_lossy(b).escape_default(),
                            ),
                        )
                    }
                }
                _ => {
                    let b = &buf[..2];
                    if b == b"\r\n" {
                        Ok(true)
                    } else {
                        *body_state = body_state.done(0);
                        Error::e_explain(
                            etype,
                            format!(
                                "Invalid chunked encoding: {} was not CRLF",
                                String::from_utf8_lossy(b).escape_default(),
                            ),
                        )
                    }
                }
            }
        }
    }
}

pub enum TrailersEndParseState {
    NotEnd(usize),   // start of bytes after CR or LF bytes
    Complete(usize), // index of message completion
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub enum BodyMode {
    ToSelect,
    ContentLength(usize, usize), // total length to write, bytes already written
    ChunkedEncoding(usize),      //bytes written
    UntilClose(usize),           //bytes written
    Complete(usize),             //bytes written
}

type BM = BodyMode;

pub struct BodyWriter {
    pub body_mode: BodyMode,
}

impl BodyWriter {
    pub fn new() -> Self {
        BodyWriter {
            body_mode: BM::ToSelect,
        }
    }

    pub fn init_chunked(&mut self) {
        self.body_mode = BM::ChunkedEncoding(0);
    }

    pub fn init_close_delimited(&mut self) {
        self.body_mode = BM::UntilClose(0);
    }

    pub fn init_content_length(&mut self, cl: usize) {
        self.body_mode = BM::ContentLength(cl, 0);
    }

    pub fn convert_to_close_delimited(&mut self) {
        if matches!(self.body_mode, BodyMode::UntilClose(_)) {
            // nothing to do, already in close-delimited mode
            return;
        }

        // NOTE: any stream buffered data will be flushed in next
        // close-delimited write
        // reset body state to close-delimited (UntilClose)
        self.body_mode = BM::UntilClose(0);
    }

    // NOTE on buffering/flush stream when writing the body
    // Buffering writes can reduce the syscalls hence improves efficiency of the system
    // But it hurts real time communication
    // So we only allow buffering when the body size is known ahead, which is less likely
    // to be real time interaction

    pub async fn write_body<S>(&mut self, stream: &mut S, buf: &[u8]) -> Result<Option<usize>>
    where
        S: AsyncWrite + Unpin + Send,
    {
        trace!("Writing Body, size: {}", buf.len());
        match self.body_mode {
            BM::Complete(_) => Ok(None),
            BM::ContentLength(_, _) => self.do_write_body(stream, buf).await,
            BM::ChunkedEncoding(_) => self.do_write_chunked_body(stream, buf).await,
            BM::UntilClose(_) => self.do_write_until_close_body(stream, buf).await,
            BM::ToSelect => Ok(None), // Error here?
        }
    }

    pub fn finished(&self) -> bool {
        match self.body_mode {
            BM::Complete(_) => true,
            BM::ContentLength(total, written) => written >= total,
            _ => false,
        }
    }

    pub fn is_close_delimited(&self) -> bool {
        matches!(self.body_mode, BM::UntilClose(_))
    }

    async fn do_write_body<S>(&mut self, stream: &mut S, buf: &[u8]) -> Result<Option<usize>>
    where
        S: AsyncWrite + Unpin + Send,
    {
        match self.body_mode {
            BM::ContentLength(total, written) => {
                if written >= total {
                    // already written full length
                    return Ok(None);
                }
                let mut to_write = total - written;
                if to_write < buf.len() {
                    warn!("Trying to write data over content-length: {total}");
                } else {
                    to_write = buf.len();
                }
                let res = stream.write_all(&buf[..to_write]).await;
                match res {
                    Ok(()) => {
                        self.body_mode = BM::ContentLength(total, written + to_write);
                        if self.finished() {
                            stream.flush().await.or_err(WriteError, "flushing body")?;
                        }
                        Ok(Some(to_write))
                    }
                    Err(e) => Error::e_because(WriteError, "while writing body", e),
                }
            }
            _ => panic!("wrong body mode: {:?}", self.body_mode),
        }
    }

    async fn do_write_chunked_body<S>(
        &mut self,
        stream: &mut S,
        buf: &[u8],
    ) -> Result<Option<usize>>
    where
        S: AsyncWrite + Unpin + Send,
    {
        match self.body_mode {
            BM::ChunkedEncoding(written) => {
                let chunk_size = buf.len();

                let chuck_size_buf = format!("{:X}\r\n", chunk_size);
                let mut output_buf = Bytes::from(chuck_size_buf).chain(buf).chain(&b"\r\n"[..]);
                stream
                    .write_vec_all(&mut output_buf)
                    .await
                    .or_err(WriteError, "while writing body")?;
                stream.flush().await.or_err(WriteError, "flushing body")?;
                self.body_mode = BM::ChunkedEncoding(written + chunk_size);
                Ok(Some(chunk_size))
            }
            _ => panic!("wrong body mode: {:?}", self.body_mode),
        }
    }

    async fn do_write_until_close_body<S>(
        &mut self,
        stream: &mut S,
        buf: &[u8],
    ) -> Result<Option<usize>>
    where
        S: AsyncWrite + Unpin + Send,
    {
        match self.body_mode {
            BM::UntilClose(written) => {
                let res = stream.write_all(buf).await;
                match res {
                    Ok(()) => {
                        self.body_mode = BM::UntilClose(written + buf.len());
                        stream.flush().await.or_err(WriteError, "flushing body")?;
                        Ok(Some(buf.len()))
                    }
                    Err(e) => Error::e_because(WriteError, "while writing body", e),
                }
            }
            _ => panic!("wrong body mode: {:?}", self.body_mode),
        }
    }

    pub async fn finish<S>(&mut self, stream: &mut S) -> Result<Option<usize>>
    where
        S: AsyncWrite + Unpin + Send,
    {
        match self.body_mode {
            BM::Complete(_) => Ok(None),
            BM::ContentLength(_, _) => self.do_finish_body(stream),
            BM::ChunkedEncoding(_) => self.do_finish_chunked_body(stream).await,
            BM::UntilClose(_) => self.do_finish_until_close_body(stream),
            BM::ToSelect => Ok(None),
        }
    }

    fn do_finish_body<S>(&mut self, _stream: S) -> Result<Option<usize>> {
        match self.body_mode {
            BM::ContentLength(total, written) => {
                self.body_mode = BM::Complete(written);
                if written < total {
                    return Error::e_explain(
                        PREMATURE_BODY_END,
                        format!("Content-length: {total} bytes written: {written}"),
                    );
                }
                Ok(Some(written))
            }
            _ => panic!("wrong body mode: {:?}", self.body_mode),
        }
    }

    async fn do_finish_chunked_body<S>(&mut self, stream: &mut S) -> Result<Option<usize>>
    where
        S: AsyncWrite + Unpin + Send,
    {
        match self.body_mode {
            BM::ChunkedEncoding(written) => {
                let res = stream.write_all(&LAST_CHUNK[..]).await;
                self.body_mode = BM::Complete(written);
                match res {
                    Ok(()) => Ok(Some(written)),
                    Err(e) => Error::e_because(WriteError, "while writing body", e),
                }
            }
            _ => panic!("wrong body mode: {:?}", self.body_mode),
        }
    }

    fn do_finish_until_close_body<S>(&mut self, _stream: &mut S) -> Result<Option<usize>> {
        match self.body_mode {
            BM::UntilClose(written) => {
                self.body_mode = BM::Complete(written);
                Ok(Some(written))
            }
            _ => panic!("wrong body mode: {:?}", self.body_mode),
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use tokio_test::io::Builder;

    fn init_log() {
        let _ = env_logger::builder().is_test(true).try_init();
    }

    #[tokio::test]
    async fn read_with_body_content_length() {
        init_log();
        let input = b"abc";
        let mut mock_io = Builder::new().read(&input[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_content_length(3, b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 3));
        assert_eq!(body_reader.body_state, ParseState::Complete(3));
        assert_eq!(input, body_reader.get_body(&res));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_content_length_2() {
        init_log();
        let input1 = b"a";
        let input2 = b"bc";
        let mut mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_content_length(3, b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 1));
        assert_eq!(body_reader.body_state, ParseState::Partial(1, 2));
        assert_eq!(input1, body_reader.get_body(&res));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 2));
        assert_eq!(body_reader.body_state, ParseState::Complete(3));
        assert_eq!(input2, body_reader.get_body(&res));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_content_length_less() {
        init_log();
        let input1 = b"a";
        let input2 = b""; // simulating close
        let mut mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_content_length(3, b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 1));
        assert_eq!(body_reader.body_state, ParseState::Partial(1, 2));
        assert_eq!(input1, body_reader.get_body(&res));
        let res = body_reader.read_body(&mut mock_io).await.unwrap_err();
        assert_eq!(&ConnectionClosed, res.etype());
        assert_eq!(body_reader.body_state, ParseState::Done(1));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_content_length_more() {
        init_log();
        let input1 = b"a";
        let input2 = b"bcd";
        let mut mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_content_length(3, b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 1));
        assert_eq!(body_reader.body_state, ParseState::Partial(1, 2));
        assert_eq!(input1, body_reader.get_body(&res));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 2));
        assert_eq!(body_reader.body_state, ParseState::Complete(3));
        assert_eq!(&input2[0..2], body_reader.get_body(&res));
        // read remaining data
        body_reader.init_content_length(1, b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(body_reader.body_state, ParseState::Complete(1));
        assert_eq!(&input2[2..], body_reader.get_body(&res));
    }

    #[tokio::test]
    async fn read_with_body_content_length_overread() {
        init_log();
        let input1 = b"a";
        let input2 = b"bcd";
        let mut mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut body_reader = BodyReader::new(true);
        body_reader.init_content_length(3, b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 1));
        assert_eq!(body_reader.body_state, ParseState::Partial(1, 2));
        assert_eq!(input1, body_reader.get_body(&res));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 2));
        assert_eq!(body_reader.body_state, ParseState::Complete(3));
        assert_eq!(&input2[0..2], body_reader.get_body(&res));
        assert_eq!(body_reader.get_body_overread(), Some(&b"d"[..]));
    }

    #[tokio::test]
    async fn read_with_body_content_length_rewind() {
        init_log();
        let rewind = b"ab";
        let input = b"c";
        let mut mock_io = Builder::new().read(&input[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_content_length(3, rewind);
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 2));
        assert_eq!(body_reader.body_state, ParseState::Partial(2, 1));
        assert_eq!(rewind, body_reader.get_body(&res));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 1));
        assert_eq!(body_reader.body_state, ParseState::Complete(3));
        assert_eq!(input, body_reader.get_body(&res));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_http10() {
        init_log();
        let input1 = b"a";
        let input2 = b""; // simulating close
        let mut mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_close_delimited(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 1));
        assert_eq!(body_reader.body_state, ParseState::UntilClose(1));
        assert_eq!(input1, body_reader.get_body(&res));
        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, None);
        assert_eq!(body_reader.body_state, ParseState::Complete(1));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_http10_rewind() {
        init_log();
        let rewind = b"ab";
        let input1 = b"c";
        let input2 = b""; // simulating close
        let mut mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_close_delimited(rewind);
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 2));
        assert_eq!(body_reader.body_state, ParseState::UntilClose(2));
        assert_eq!(rewind, body_reader.get_body(&res));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 1));
        assert_eq!(body_reader.body_state, ParseState::UntilClose(3));
        assert_eq!(input1, body_reader.get_body(&res));
        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, None);
        assert_eq!(body_reader.body_state, ParseState::Complete(3));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_zero_chunk() {
        init_log();
        let input = b"0\r\n\r\n";
        let mut mock_io = Builder::new().read(&input[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, None);
        assert_eq!(body_reader.body_state, ParseState::Complete(0));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_zero_chunk_malformed() {
        init_log();
        let input = b"0\r\nr\n";
        let mut mock_io = Builder::new().read(&input[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::ChunkedFinal(0, 0, 2, 2));

        // \n without leading \r
        let e = body_reader.read_body(&mut mock_io).await.unwrap_err();
        assert_eq!(*e.etype(), INVALID_TRAILER_END);
        assert_eq!(body_reader.body_state, ParseState::Done(0));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_zero_chunk_split() {
        init_log();
        let input1 = b"0\r\n";
        let input2 = b"\r\n";
        let mut mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::ChunkedFinal(0, 0, 0, 2));
        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, None);
        assert_eq!(body_reader.body_state, ParseState::Complete(0));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_zero_chunk_split_head() {
        init_log();
        let input1 = b"0\r";
        let input2 = b"\n";
        let input3 = b"\r\n";
        let mut mock_io = Builder::new()
            .read(&input1[..])
            .read(&input2[..])
            .read(&input3[..])
            .build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::Chunked(0, 0, 2, 2));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::ChunkedFinal(0, 0, 0, 2));
        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, None);
        assert_eq!(body_reader.body_state, ParseState::Complete(0));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_zero_chunk_split_head_2() {
        init_log();
        let input1 = b"0";
        let input2 = b"\r\n";
        let input3 = b"\r\n";
        let mut mock_io = Builder::new()
            .read(&input1[..])
            .read(&input2[..])
            .read(&input3[..])
            .build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::Chunked(0, 0, 1, 1));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::ChunkedFinal(0, 0, 0, 2));
        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, None);
        assert_eq!(body_reader.body_state, ParseState::Complete(0));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_zero_chunk_split_head_3() {
        init_log();
        let input1 = b"0\r";
        let input2 = b"\n";
        let input3 = b"\r";
        let input4 = b"\n";
        let mut mock_io = Builder::new()
            .read(&input1[..])
            .read(&input2[..])
            .read(&input3[..])
            .read(&input4[..])
            .build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::Chunked(0, 0, 2, 2));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::ChunkedFinal(0, 0, 0, 2));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::ChunkedFinal(0, 0, 0, 3));

        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, None);
        assert_eq!(body_reader.body_state, ParseState::Complete(0));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_chunk_ext() {
        init_log();
        let input = b"0;aaaa\r\n\r\n";
        let mut mock_io = Builder::new().read(&input[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, None);
        assert_eq!(body_reader.body_state, ParseState::Complete(0));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_chunk_ext_oversize() {
        init_log();
        let chunk_size = b"0;";
        let ext1 = [b'a'; 1024 * 5];
        let ext2 = [b'a'; 1024 * 3];
        let mut mock_io = Builder::new()
            .read(&chunk_size[..])
            .read(&ext1[..])
            .read(&ext2[..])
            .build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        // read chunk-size, chunk incomplete
        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, Some(BufRef::new(0, 0)));
        // read ext1, chunk incomplete
        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, Some(BufRef::new(0, 0)));
        // read ext2, now oversized
        let res = body_reader.read_body(&mut mock_io).await;
        assert!(res.is_err());
        assert_eq!(body_reader.body_state, ParseState::Done(0));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_1_chunk() {
        init_log();
        let input1 = b"1\r\na\r\n";
        let input2 = b"0\r\n\r\n";
        let mut mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(3, 1));
        assert_eq!(&input1[3..4], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 0, 0, 0));
        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, None);
        assert_eq!(body_reader.body_state, ParseState::Complete(1));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_1_chunk_malformed() {
        init_log();
        let input1 = b"1\r\na\rn";
        let mut mock_io = Builder::new().read(&input1[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");

        let e = body_reader.read_body(&mut mock_io).await.unwrap_err();
        assert_eq!(*e.etype(), INVALID_CHUNK);
        assert_eq!(body_reader.body_state, ParseState::Done(0));

        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_1_chunk_partial_end() {
        init_log();
        let input1 = b"1\r\na\r";
        let input2 = b"\n0\r\n\r\n";
        let mut mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(3, 1));
        assert_eq!(&input1[3..4], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 0, 0, 1));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 1, 6, 0));
        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, None);
        assert_eq!(body_reader.body_state, ParseState::Complete(1));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_1_chunk_partial_end_1() {
        init_log();
        let input1 = b"3\r\n";
        let input2 = b"abc\r";
        let input3 = b"\n0\r\n\r\n";
        let mut mock_io = Builder::new()
            .read(&input1[..])
            .read(&input2[..])
            .read(&input3[..])
            .build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(3, 0));
        assert_eq!(b"", body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(0, 0, 0, 5));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 3));
        assert_eq!(&input2[0..3], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(3, 0, 0, 1));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, None);
        assert_eq!(body_reader.body_state, ParseState::Complete(3));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_1_chunk_partial_end_2() {
        init_log();
        let input1 = b"3\r\n";
        let input2 = b"abc";
        let input3 = b"\r\n0\r\n\r\n";
        let mut mock_io = Builder::new()
            .read(&input1[..])
            .read(&input2[..])
            .read(&input3[..])
            .build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(3, 0));
        assert_eq!(b"", body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(0, 0, 0, 5));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 3));
        assert_eq!(&input2[0..3], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(3, 0, 0, 2));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, None);
        assert_eq!(body_reader.body_state, ParseState::Complete(3));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_1_chunk_incomplete() {
        init_log();
        let input1 = b"1\r\na\r\n";
        let mut mock_io = Builder::new().read(&input1[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(3, 1));
        assert_eq!(&input1[3..4], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 0, 0, 0));
        let res = body_reader.read_body(&mut mock_io).await;
        assert!(res.is_err());
        assert_eq!(body_reader.body_state, ParseState::Done(1));
    }

    #[tokio::test]
    async fn read_with_body_1_chunk_partial_end_malformed() {
        init_log();
        let input1 = b"1\r\na\r";
        let input2 = b"n0\r\n\r\n";
        let mut mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(3, 1));
        assert_eq!(&input1[3..4], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 0, 0, 1));
        let e = body_reader.read_body(&mut mock_io).await.unwrap_err();
        assert_eq!(*e.etype(), INVALID_CHUNK);
        assert_eq!(body_reader.body_state, ParseState::Done(1));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_1_chunk_rewind() {
        init_log();
        let rewind = b"1\r\nx\r\n";
        let input1 = b"1\r\na\r\n";
        let input2 = b"0\r\n\r\n";
        let mut mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(rewind);
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(3, 1));
        assert_eq!(&rewind[3..4], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 0, 0, 0));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(3, 1));
        assert_eq!(&input1[3..4], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(2, 0, 0, 0));
        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, None);
        assert_eq!(body_reader.body_state, ParseState::Complete(2));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_multi_chunk() {
        init_log();
        let input1 = b"1\r\na\r\n2\r\nbc\r\n";
        let input2 = b"0\r\n\r\n";
        let mut mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(3, 1));
        assert_eq!(&input1[3..4], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 6, 13, 0));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(9, 2));
        assert_eq!(&input1[9..11], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(3, 0, 0, 0));
        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, None);
        assert_eq!(body_reader.body_state, ParseState::Complete(3));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_multi_chunk_malformed() {
        init_log();
        let input1 = b"1\r\na\r\n2\r\nbcr\n";
        let mut mock_io = Builder::new().read(&input1[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");

        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(3, 1));
        assert_eq!(&input1[3..4], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 6, 13, 0));

        let e = body_reader.read_body(&mut mock_io).await.unwrap_err();
        assert_eq!(*e.etype(), INVALID_CHUNK);
        assert_eq!(body_reader.body_state, ParseState::Done(1));
        assert_eq!(body_reader.get_body_overread(), None);

        let input1 = b"1\r\nar\n2\r\nbc\rn";
        let mut mock_io = Builder::new().read(&input1[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");

        let e = body_reader.read_body(&mut mock_io).await.unwrap_err();
        assert_eq!(*e.etype(), INVALID_CHUNK);
        assert_eq!(body_reader.body_state, ParseState::Done(0));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_partial_chunk() {
        init_log();
        let input1 = b"3\r\na";
        let input2 = b"bc\r\n0\r\n\r\n";
        let mut mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(3, 1));
        assert_eq!(&input1[3..4], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 0, 0, 4));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 2));
        assert_eq!(&input2[0..2], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(3, 4, 9, 0));
        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, None);
        assert_eq!(body_reader.body_state, ParseState::Complete(3));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_partial_chunk_end() {
        init_log();
        let input1 = b"3\r\nabc";
        let input2 = b"\r\n0\r\n\r\n";
        let mut mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(3, 3));
        assert_eq!(&input1[3..6], body_reader.get_body(&res));
        // \r\n (2 bytes) left to read from IO
        assert_eq!(body_reader.body_state, ParseState::Chunked(3, 0, 0, 2));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(&input2[0..0], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(3, 2, 7, 0));
        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, None);
        assert_eq!(body_reader.body_state, ParseState::Complete(3));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_partial_head_chunk() {
        init_log();
        let input1 = b"1\r";
        let input2 = b"\na\r\n0\r\n\r\n";
        let mut mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::Chunked(0, 0, 2, 2));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(3, 1)); // input1 concat input2
        assert_eq!(&input2[1..2], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 6, 11, 0));
        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, None);
        assert_eq!(body_reader.body_state, ParseState::Complete(1));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_partial_head_terminal_crlf() {
        init_log();
        let input1 = b"1\r";
        let input2 = b"\na\r\n0\r\n\r";
        let input3 = b"\n";
        let mut mock_io = Builder::new()
            .read(&input1[..])
            .read(&input2[..])
            .read(&input3[..])
            .build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::Chunked(0, 0, 2, 2));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(3, 1)); // input1 concat input2
        assert_eq!(&input2[1..2], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 6, 10, 0));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0)); // only part of terminal crlf, one more byte to read
        assert_eq!(body_reader.body_state, ParseState::ChunkedFinal(1, 0, 1, 2));
        // TODO: can optimize this to avoid the second read_body call
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::ChunkedFinal(1, 0, 0, 3));

        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, None);
        assert_eq!(body_reader.body_state, ParseState::Complete(1));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_partial_head_terminal_crlf_2() {
        init_log();
        let input1 = b"1\r";
        let input2 = b"\na\r\n0\r";
        let input3 = b"\n\r\n";
        let mut mock_io = Builder::new()
            .read(&input1[..])
            .read(&input2[..])
            .read(&input3[..])
            .build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::Chunked(0, 0, 2, 2));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(3, 1)); // input1 concat input2
        assert_eq!(&input2[1..2], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 6, 8, 0));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0)); // only part of terminal crlf, one more byte to read
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 0, 8, 2));
        // optimized to go right to complete state
        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, None);
        assert_eq!(body_reader.body_state, ParseState::Complete(1));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_partial_head_terminal_crlf_3() {
        init_log();
        let input1 = b"1\r\na\r\n0";
        let input2 = b"\r";
        let input3 = b"\n";
        let input4 = b"\r";
        let input5 = b"\n";
        let mut mock_io = Builder::new()
            .read(&input1[..])
            .read(&input2[..])
            .read(&input3[..])
            .read(&input4[..])
            .read(&input5[..])
            .build();

        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(3, 1));
        assert_eq!(&input1[3..4], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 6, 7, 0));
        // to 0
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 0, 7, 1));
        // \r
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 0, 2, 2));
        // \n
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::ChunkedFinal(1, 0, 0, 2));
        // \r
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::ChunkedFinal(1, 0, 0, 3));
        // \n
        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, None);
        assert_eq!(body_reader.body_state, ParseState::Complete(1));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_partial_head_terminal_crlf_malformed() {
        init_log();
        let input1 = b"1\r";
        let input2 = b"\na\r\n0\r\nr";
        let mut mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");

        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::Chunked(0, 0, 2, 2));

        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(3, 1)); // input1 concat input2
        assert_eq!(&input2[1..2], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 6, 10, 0));

        // TODO: may be able to optimize this extra read_body out
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::ChunkedFinal(1, 0, 1, 2));
        // "r" is interpreted as a hanging trailer
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::ChunkedFinal(1, 3, 0, 0));

        let res = body_reader.read_body(&mut mock_io).await.unwrap_err();
        assert_eq!(&ConnectionClosed, res.etype());
        assert_eq!(body_reader.body_state, ParseState::Done(1));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_partial_head_terminal_crlf_overread() {
        init_log();
        let input1 = b"1\r";
        let input2 = b"\na\r\n0\r\n\r";
        let input3 = b"\nabcd";
        let mut mock_io = Builder::new()
            .read(&input1[..])
            .read(&input2[..])
            .read(&input3[..])
            .build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::Chunked(0, 0, 2, 2));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(3, 1)); // input1 concat input2
        assert_eq!(&input2[1..2], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 6, 10, 0));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0)); // read only part of terminal crlf
        assert_eq!(body_reader.body_state, ParseState::ChunkedFinal(1, 0, 1, 2));
        // TODO: can optimize this to avoid the second read_body call
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::ChunkedFinal(1, 0, 0, 3));

        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, None);
        assert_eq!(body_reader.body_state, ParseState::Complete(1));
        assert_eq!(body_reader.get_body_overread(), Some(&b"abcd"[..]));
    }

    #[tokio::test]
    async fn read_with_body_multi_chunk_overread() {
        init_log();
        let input1 = b"1\r\na\r\n2\r\nbc\r\n";
        let input2 = b"0\r\n\r\nabc";
        let mut mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(3, 1));
        assert_eq!(&input1[3..4], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 6, 13, 0));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(9, 2));
        assert_eq!(&input1[9..11], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(3, 0, 0, 0));
        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, None);
        assert_eq!(body_reader.body_state, ParseState::Complete(3));
        assert_eq!(body_reader.get_body_overread(), Some(&b"abc"[..]));
    }

    #[tokio::test]
    async fn read_with_body_partial_head_chunk_incomplete() {
        init_log();
        let input1 = b"1\r";
        let mut mock_io = Builder::new().read(&input1[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::Chunked(0, 0, 2, 2));
        let res = body_reader.read_body(&mut mock_io).await;
        assert!(res.is_err());
        assert_eq!(body_reader.body_state, ParseState::Done(0));
    }

    #[tokio::test]
    async fn read_with_body_trailers() {
        init_log();
        let input1 = b"1\r\na\r\n2\r\nbc\r\n";
        let input2 = b"0\r\nabc: hi";
        let input3 = b"\r\ndef: bye\r";
        let input4 = b"\nghi: more\r\n";
        let input5 = b"\r\n";
        let mut mock_io = Builder::new()
            .read(&input1[..])
            .read(&input2[..])
            .read(&input3[..])
            .read(&input4[..])
            .read(&input5[..])
            .build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(3, 1));
        assert_eq!(&input1[3..4], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 6, 13, 0));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(9, 2));
        assert_eq!(&input1[9..11], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(3, 0, 0, 0));
        // abc: hi
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::ChunkedFinal(3, 0, 7, 2));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(
            body_reader.body_state,
            // NOTE: 0 chunk-size CRLF counted in trailer size too
            ParseState::ChunkedFinal(3, 9, 0, 0)
        );
        // def: bye
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(
            body_reader.body_state,
            ParseState::ChunkedFinal(3, 19, 0, 1)
        );
        // ghi: more
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(
            body_reader.body_state,
            ParseState::ChunkedFinal(3, 30, 0, 2)
        );

        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, None);
        assert_eq!(body_reader.body_state, ParseState::Complete(3));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_trailers_2() {
        init_log();
        let input1 = b"1\r\na\r\n0\r";
        let input2 = b"\nabc: hi\r\n\r\n";
        let mut mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(3, 1));
        assert_eq!(&input1[3..4], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 6, 8, 0));
        // 0 \r
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 0, 8, 2));
        // \n TODO: optimize this call out
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(
            body_reader.body_state,
            ParseState::ChunkedFinal(1, 0, 11, 2)
        );
        // abc: hi with end in same read
        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, None);
        assert_eq!(body_reader.body_state, ParseState::Complete(1));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_trailers_3() {
        init_log();
        let input1 = b"1\r\na\r\n0\r";
        let input2 = b"\nabc: hi";
        let input3 = b"\r\n\r\n";
        let mut mock_io = Builder::new()
            .read(&input1[..])
            .read(&input2[..])
            .read(&input3[..])
            .build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(3, 1));
        assert_eq!(&input1[3..4], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 6, 8, 0));
        // 0 \r
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 0, 8, 2));
        // \n TODO: optimize this call out
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::ChunkedFinal(1, 0, 7, 2));
        // abc: hi
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(
            body_reader.body_state,
            // NOTE: 0 chunk-size CRLF counted in trailer size too
            ParseState::ChunkedFinal(1, 9, 0, 0)
        );
        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, None);
        assert_eq!(body_reader.body_state, ParseState::Complete(1));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_trailers_4() {
        init_log();
        let input1 = b"1\r\na\r\n0\r";
        let input2 = b"\nabc: hi\r\n\r";
        let input3 = b"\n";
        let mut mock_io = Builder::new()
            .read(&input1[..])
            .read(&input2[..])
            .read(&input3[..])
            .build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(3, 1));
        assert_eq!(&input1[3..4], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 6, 8, 0));
        // 0 \r
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 0, 8, 2));
        // \n TODO: optimize this call out
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(
            body_reader.body_state,
            ParseState::ChunkedFinal(1, 0, 10, 2)
        );
        // abc: hi
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(
            body_reader.body_state,
            // NOTE: 0 chunk-size CRLF counted in trailer size too
            ParseState::ChunkedFinal(1, 9, 0, 3)
        );
        let res = body_reader.read_body(&mut mock_io).await.unwrap();
        assert_eq!(res, None);
        assert_eq!(body_reader.body_state, ParseState::Complete(1));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_trailers_malformed() {
        init_log();
        let input1 = b"1\r\na\r\n0\r";
        let input2 = b"\nabc: hi\rn";
        let mut mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(3, 1));
        assert_eq!(&input1[3..4], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 6, 8, 0));
        // 0 \r
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 0, 8, 2));
        // abc: hi to \rn
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::ChunkedFinal(1, 0, 9, 2));
        // \rn not valid
        let e = body_reader.read_body(&mut mock_io).await.unwrap_err();
        assert_eq!(*e.etype(), INVALID_TRAILER_END);
        assert_eq!(body_reader.body_state, ParseState::Done(1));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_trailers_malformed_2() {
        init_log();
        let input1 = b"1\r\na\r\n0\r";
        let input2 = b"\nabc: hi\r\n";
        // no end
        let mut mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(3, 1));
        assert_eq!(&input1[3..4], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 6, 8, 0));
        // 0 \r
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 0, 8, 2));
        // abc: hi to \r\n
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::ChunkedFinal(1, 0, 9, 2));
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::ChunkedFinal(1, 9, 0, 2));
        // EOF
        let e = body_reader.read_body(&mut mock_io).await.unwrap_err();
        assert_eq!(*e.etype(), ConnectionClosed);
        assert_eq!(body_reader.body_state, ParseState::Done(1));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_trailers_malformed_3() {
        init_log();
        let input1 = b"1\r\na\r\n0\r\n";
        let input2 = b"abc: hi\r\n";
        let input3 = b"r\n";
        let mut mock_io = Builder::new()
            .read(&input1[..])
            .read(&input2[..])
            .read(&input3[..])
            .build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(3, 1));
        assert_eq!(&input1[3..4], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 6, 9, 0));
        // 0 \r\n
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::ChunkedFinal(1, 0, 0, 2));
        // abc: hi
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::ChunkedFinal(1, 9, 0, 2));
        // r\n not valid
        let e = body_reader.read_body(&mut mock_io).await.unwrap_err();
        assert_eq!(*e.etype(), INVALID_TRAILER_END);
        assert_eq!(body_reader.body_state, ParseState::Done(1));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn read_with_body_trailers_overflow() {
        init_log();
        let input1 = b"1\r\na\r\n0\r\n";
        let input2 = b"abc: ";
        let trailer1 = [b'a'; 1024 * 60];
        let trailer2 = [b'a'; 1024 * 5];
        let input3 = b"defghi: ";
        let mut mock_io = Builder::new()
            .read(&input1[..])
            .read(&input2[..])
            .read(&trailer1[..])
            .read(&CRLF[..])
            .read(&input3[..])
            .read(&trailer2[..])
            .build();
        let mut body_reader = BodyReader::new(false);
        body_reader.init_chunked(b"");
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(3, 1));
        assert_eq!(&input1[3..4], body_reader.get_body(&res));
        assert_eq!(body_reader.body_state, ParseState::Chunked(1, 6, 9, 0));
        // 0 \r\n
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::ChunkedFinal(1, 0, 0, 2));
        // abc:
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(body_reader.body_state, ParseState::ChunkedFinal(1, 7, 0, 0));
        // aaa...
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(
            body_reader.body_state,
            ParseState::ChunkedFinal(1, 1024 * 60 + 7, 0, 0)
        );
        // CRLF
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(
            body_reader.body_state,
            ParseState::ChunkedFinal(1, 1024 * 60 + 7, 0, 2)
        );
        // defghi:
        let res = body_reader.read_body(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 0));
        assert_eq!(
            body_reader.body_state,
            ParseState::ChunkedFinal(1, 1024 * 60 + 17, 0, 0)
        );
        // overflow
        let e = body_reader.read_body(&mut mock_io).await.unwrap_err();
        assert_eq!(*e.etype(), INVALID_TRAILER_END);
        assert_eq!(body_reader.body_state, ParseState::Done(1));
        assert_eq!(body_reader.get_body_overread(), None);
    }

    #[tokio::test]
    async fn write_body_cl() {
        init_log();
        let output = b"a";
        let mut mock_io = Builder::new().write(&output[..]).build();
        let mut body_writer = BodyWriter::new();
        body_writer.init_content_length(1);
        assert_eq!(body_writer.body_mode, BodyMode::ContentLength(1, 0));
        let res = body_writer
            .write_body(&mut mock_io, &output[..])
            .await
            .unwrap()
            .unwrap();
        assert_eq!(res, 1);
        assert_eq!(body_writer.body_mode, BodyMode::ContentLength(1, 1));
        // write again, over the limit
        let res = body_writer
            .write_body(&mut mock_io, &output[..])
            .await
            .unwrap();
        assert_eq!(res, None);
        assert_eq!(body_writer.body_mode, BodyMode::ContentLength(1, 1));
        let res = body_writer.finish(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, 1);
        assert_eq!(body_writer.body_mode, BodyMode::Complete(1));
    }

    #[tokio::test]
    async fn write_body_chunked() {
        init_log();
        let data = b"abcdefghij";
        let output = b"A\r\nabcdefghij\r\n";
        let mut mock_io = Builder::new()
            .write(&output[..])
            .write(&output[..])
            .write(&LAST_CHUNK[..])
            .build();
        let mut body_writer = BodyWriter::new();
        body_writer.init_chunked();
        assert_eq!(body_writer.body_mode, BodyMode::ChunkedEncoding(0));
        let res = body_writer
            .write_body(&mut mock_io, &data[..])
            .await
            .unwrap()
            .unwrap();
        assert_eq!(res, data.len());
        assert_eq!(body_writer.body_mode, BodyMode::ChunkedEncoding(data.len()));
        let res = body_writer
            .write_body(&mut mock_io, &data[..])
            .await
            .unwrap()
            .unwrap();
        assert_eq!(res, data.len());
        assert_eq!(
            body_writer.body_mode,
            BodyMode::ChunkedEncoding(data.len() * 2)
        );
        let res = body_writer.finish(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, data.len() * 2);
        assert_eq!(body_writer.body_mode, BodyMode::Complete(data.len() * 2));
    }

    #[tokio::test]
    async fn write_body_http10() {
        init_log();
        let data = b"a";
        let mut mock_io = Builder::new().write(&data[..]).write(&data[..]).build();
        let mut body_writer = BodyWriter::new();
        body_writer.init_close_delimited();
        assert_eq!(body_writer.body_mode, BodyMode::UntilClose(0));
        let res = body_writer
            .write_body(&mut mock_io, &data[..])
            .await
            .unwrap()
            .unwrap();
        assert_eq!(res, 1);
        assert_eq!(body_writer.body_mode, BodyMode::UntilClose(1));
        let res = body_writer
            .write_body(&mut mock_io, &data[..])
            .await
            .unwrap()
            .unwrap();
        assert_eq!(res, 1);
        assert_eq!(body_writer.body_mode, BodyMode::UntilClose(2));
        let res = body_writer.finish(&mut mock_io).await.unwrap().unwrap();
        assert_eq!(res, 2);
        assert_eq!(body_writer.body_mode, BodyMode::Complete(2));
    }
}


================================================
FILE: pingora-core/src/protocols/http/v1/client.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! HTTP/1.x client session

use bytes::{BufMut, Bytes, BytesMut};
use http::{header, header::AsHeaderName, HeaderValue, StatusCode, Version};
use log::{debug, trace};
use pingora_error::{Error, ErrorType::*, OrErr, Result, RetryType};
use pingora_http::{HMap, IntoCaseHeaderName, RequestHeader, ResponseHeader};
use pingora_timeout::timeout;
use std::io::ErrorKind;
use std::str;
use std::time::Duration;
use tokio::io::{AsyncReadExt, AsyncWriteExt};

use super::body::{BodyReader, BodyWriter};
use super::common::*;
use crate::protocols::http::HttpTask;
use crate::protocols::{Digest, SocketAddr, Stream, UniqueID, UniqueIDType};
use crate::utils::{BufRef, KVRef};

/// The HTTP 1.x client session
pub struct HttpSession {
    buf: Bytes,
    pub(crate) underlying_stream: Stream,
    raw_header: Option<BufRef>,
    preread_body: Option<BufRef>,
    body_reader: BodyReader,
    body_writer: BodyWriter,
    // timeouts:
    /// The read timeout, which will be applied to both reading the header and the body.
    /// The timeout is reset on every read. This is not a timeout on the overall duration of the
    /// response.
    pub read_timeout: Option<Duration>,
    /// The write timeout which will be applied to both writing request header and body.
    /// The timeout is reset on every write. This is not a timeout on the overall duration of the
    /// request.
    pub write_timeout: Option<Duration>,
    keepalive_timeout: KeepaliveStatus,
    pub(crate) digest: Box<Digest>,
    response_header: Option<Box<ResponseHeader>>,
    request_written: Option<Box<RequestHeader>>,
    bytes_sent: usize,
    /// Total response body payload bytes received from upstream
    body_recv: usize,
    // Tracks whether upgrade handshake was successfully completed
    upgraded: bool,
    // Tracks whether downstream request body started sending upgraded bytes
    received_upgrade_req_body: bool,
    // Tracks whether the response read was ever close-delimited
    // (even after body complete)
    close_delimited_resp: bool,
    // If allowed, does not fail with error on invalid content-length
    // (treats as close-delimited response).
    allow_h1_response_invalid_content_length: bool,
}

/// HTTP 1.x client session
impl HttpSession {
    /// Create a new http client session from an established (TCP or TLS) [`Stream`].
    pub fn new(stream: Stream) -> Self {
        // TODO: maybe we should put digest in the connection itself
        let digest = Box::new(Digest {
            ssl_digest: stream.get_ssl_digest(),
            timing_digest: stream.get_timing_digest(),
            proxy_digest: stream.get_proxy_digest(),
            socket_digest: stream.get_socket_digest(),
        });
        HttpSession {
            underlying_stream: stream,
            buf: Bytes::new(), // zero size, will be replaced by parsed header later
            raw_header: None,
            preread_body: None,
            body_reader: BodyReader::new(true),
            body_writer: BodyWriter::new(),
            keepalive_timeout: KeepaliveStatus::Off,
            response_header: None,
            request_written: None,
            read_timeout: None,
            write_timeout: None,
            digest,
            bytes_sent: 0,
            body_recv: 0,
            upgraded: false,
            received_upgrade_req_body: false,
            close_delimited_resp: false,
            allow_h1_response_invalid_content_length: false,
        }
    }

    /// Create a new http client session and apply peer options
    pub fn new_with_options<P: crate::upstreams::peer::Peer>(stream: Stream, peer: &P) -> Self {
        let mut session = Self::new(stream);
        if let Some(options) = peer.get_peer_options() {
            session.set_allow_h1_response_invalid_content_length(
                options.allow_h1_response_invalid_content_length,
            );
        }
        session
    }

    /// Write the request header to the server
    /// After the request header is sent. The caller can either start reading the response or
    /// sending request body if any.
    pub async fn write_request_header(&mut self, req: Box<RequestHeader>) -> Result<usize> {
        // TODO: make sure this can only be called once
        // init body writer
        self.init_req_body_writer(&req);

        let to_wire = http_req_header_to_wire(&req).unwrap();
        trace!("Writing request header: {to_wire:?}");

        let write_fut = self.underlying_stream.write_all(to_wire.as_ref());
        match self.write_timeout {
            Some(t) => match timeout(t, write_fut).await {
                Ok(res) => res,
                Err(_) => Err(std::io::Error::from(ErrorKind::TimedOut)),
            },
            None => write_fut.await,
        }
        .map_err(|e| match e.kind() {
            ErrorKind::TimedOut => {
                Error::because(WriteTimedout, "while writing request headers (timeout)", e)
            }
            _ => Error::because(WriteError, "while writing request headers", e),
        })?;

        self.underlying_stream
            .flush()
            .await
            .or_err(WriteError, "flushing request header")?;

        // write was successful
        self.request_written = Some(req);
        Ok(to_wire.len())
    }

    async fn do_write_body(&mut self, buf: &[u8]) -> Result<Option<usize>> {
        let written = self
            .body_writer
            .write_body(&mut self.underlying_stream, buf)
            .await;

        if let Ok(Some(num_bytes)) = written {
            self.bytes_sent += num_bytes;
        }

        written
    }

    /// Write request body. Return Ok(None) if no more body should be written, either due to
    /// Content-Length or the last chunk is already sent
    pub async fn write_body(&mut self, buf: &[u8]) -> Result<Option<usize>> {
        // TODO: verify that request header is sent already
        match self.write_timeout {
            Some(t) => match timeout(t, self.do_write_body(buf)).await {
                Ok(res) => res,
                Err(_) => Error::e_explain(WriteTimedout, format!("writing body, timeout: {t:?}")),
            },
            None => self.do_write_body(buf).await,
        }
    }

    fn maybe_force_close_body_reader(&mut self) {
        if self.upgraded && self.received_upgrade_req_body && !self.body_reader.body_done() {
            // request is done, reset the response body to close
            self.body_reader.init_content_length(0, b"");
        }
    }

    /// Flush local buffer and notify the server by sending the last chunk if chunked encoding is
    /// used.
    pub async fn finish_body(&mut self) -> Result<Option<usize>> {
        let res = self.body_writer.finish(&mut self.underlying_stream).await?;
        self.underlying_stream
            .flush()
            .await
            .or_err(WriteError, "flushing body")?;

        self.maybe_force_close_body_reader();
        Ok(res)
    }

    // Validate the response header read. This function must be called after the response header
    // read.
    fn validate_response(&self) -> Result<()> {
        let resp_header = self
            .response_header
            .as_ref()
            .expect("response header must be read");

        // ad-hoc checks
        super::common::check_dup_content_length(&resp_header.headers)?;

        // Validate content-length value if present
        // Note: Content-Length is already removed if Transfer-Encoding is present
        if !self.allow_h1_response_invalid_content_length {
            self.get_content_length()?;
        }

        Ok(())
    }

    /// Read the response header from the server
    /// This function can be called multiple times, if the headers received are just informational
    /// headers.
    pub async fn read_response(&mut self) -> Result<usize> {
        if self.preread_body.as_ref().is_none_or(|b| b.is_empty()) {
            // preread_body is set after a completed valid response header is read
            // if called multiple times (i.e. after informational responses),
            // we want to parse the already read buffer bytes as more headers.
            // (https://datatracker.ietf.org/doc/html/rfc9110#section-15.2
            // "A 1xx response is terminated by the end of the header section;
            // it cannot contain content or trailers.")
            // If this next read_response call completes successfully,
            // self.buf will be reset to the last response + any body.
            self.buf.clear();
        }
        let mut buf = BytesMut::with_capacity(INIT_HEADER_BUF_SIZE);
        let mut already_read: usize = 0;
        loop {
            if already_read > MAX_HEADER_SIZE {
                /* NOTE: this check only blocks second read. The first large read is allowed
                since the buf is already allocated. The goal is to avoid slowly bloating
                this buffer */
                return Error::e_explain(
                    InvalidHTTPHeader,
                    format!("Response header larger than {MAX_HEADER_SIZE}"),
                );
            }

            let preread = self.preread_body.take();
            let read_result = if let Some(preread) = preread.filter(|b| !b.is_empty()) {
                buf.put_slice(preread.get(&self.buf));
                Ok(preread.len())
            } else {
                let read_fut = self.underlying_stream.read_buf(&mut buf);
                match self.read_timeout {
                    Some(t) => timeout(t, read_fut).await.map_err(|_| {
                        Error::explain(ReadTimedout, "while reading response headers")
                    })?,
                    None => read_fut.await,
                }
            };
            let n = match read_result {
                Ok(n) => match n {
                    0 => {
                        let mut e = Error::explain(
                            ConnectionClosed,
                            format!(
                                "while reading response headers, bytes already read: {already_read}",
                            ),
                        );
                        e.retry = RetryType::ReusedOnly;
                        return Err(e);
                    }
                    _ => {
                        n /* read n bytes, continue */
                    }
                },
                Err(e) => {
                    let true_io_error = e.raw_os_error().is_some();
                    let mut e = Error::because(
                        ReadError,
                        format!(
                            "while reading response headers, bytes already read: {already_read}",
                        ),
                        e,
                    );
                    // Likely OSError, typical if a previously reused connection drops it
                    if true_io_error {
                        e.retry = RetryType::ReusedOnly;
                    } // else: not safe to retry TLS error
                    return Err(e);
                }
            };
            already_read += n;
            let mut headers = [httparse::EMPTY_HEADER; MAX_HEADERS];
            let mut resp = httparse::Response::new(&mut headers);
            let parsed = parse_resp_buffer(&mut resp, &buf);
            match parsed {
                HeaderParseState::Complete(s) => {
                    self.raw_header = Some(BufRef(0, s));
                    self.preread_body = Some(BufRef(s, already_read));
                    let base = buf.as_ptr() as usize;
                    let mut header_refs = Vec::<KVRef>::with_capacity(resp.headers.len());

                    // Note: resp.headers has the correct number of headers
                    // while header_refs doesn't as it is still empty
                    let _num_headers = populate_headers(base, &mut header_refs, resp.headers);

                    let mut response_header = Box::new(ResponseHeader::build(
                        resp.code.unwrap(),
                        Some(resp.headers.len()),
                    )?);

                    // TODO: enforce https://datatracker.ietf.org/doc/html/rfc9110#section-15.2
                    // "Since HTTP/1.0 did not define any 1xx status codes,
                    // a server MUST NOT send a 1xx response to an HTTP/1.0 client."
                    response_header.set_version(match resp.version {
                        Some(1) => Version::HTTP_11,
                        Some(0) => Version::HTTP_10,
                        _ => Version::HTTP_09,
                    });

                    response_header.set_reason_phrase(resp.reason)?;

                    let buf = buf.freeze();

                    for header in header_refs {
                        let header_name = header.get_name_bytes(&buf);
                        let header_name = header_name.into_case_header_name();
                        let value_bytes = header.get_value_bytes(&buf);
                        let header_value = if cfg!(debug_assertions) {
                            // from_maybe_shared_unchecked() in debug mode still checks whether
                            // the header value is valid, which breaks the _obsolete_multiline
                            // support. To work around this, in debug mode, we replace CRLF with
                            // whitespace
                            if let Some(p) = value_bytes.windows(CRLF.len()).position(|w| w == CRLF)
                            {
                                let mut new_header = Vec::from_iter(value_bytes);
                                new_header[p] = b' ';
                                new_header[p + 1] = b' ';
                                unsafe {
                                    http::HeaderValue::from_maybe_shared_unchecked(new_header)
                                }
                            } else {
                                unsafe {
                                    http::HeaderValue::from_maybe_shared_unchecked(value_bytes)
                                }
                            }
                        } else {
                            // safe because this is from what we parsed
                            unsafe { http::HeaderValue::from_maybe_shared_unchecked(value_bytes) }
                        };
                        response_header
                            .append_header(header_name, header_value)
                            .or_err(InvalidHTTPHeader, "while parsing request header")?;
                    }

                    let contains_transfer_encoding = response_header
                        .headers
                        .contains_key(header::TRANSFER_ENCODING);
                    let contains_content_length =
                        response_header.headers.contains_key(header::CONTENT_LENGTH);

                    // Transfer encoding overrides content length, so when
                    // both are present, we MUST remove content length. This is
                    // https://datatracker.ietf.org/doc/html/rfc9112#section-6.3-2.3
                    if contains_content_length && contains_transfer_encoding {
                        response_header.remove_header(&header::CONTENT_LENGTH);
                    }

                    self.buf = buf;
                    self.response_header = Some(response_header);
                    self.validate_response()?;
                    // convert to upgrade body type
                    // https://datatracker.ietf.org/doc/html/rfc9110#status.101
                    // as an "informational" header, this cannot have a body
                    self.upgraded = self
                        .is_upgrade(self.response_header.as_deref().expect("init above"))
                        .unwrap_or(false);
                    // init body reader if upgrade status has changed body mode
                    // (read_response_task will immediately try to init body afterwards anyways)
                    // informational headers will automatically avoid initializing body reader
                    self.init_body_reader();
                    // note that the (request) body writer is converted to close delimit
                    // when the upgraded body tasks are received
                    return Ok(s);
                }
                HeaderParseState::Partial => { /* continue the loop */ }
                HeaderParseState::Invalid(e) => {
                    return Error::e_because(
                        InvalidHTTPHeader,
                        format!("buf: {}", buf.escape_ascii()),
                        e,
                    );
                }
            }
        }
    }

    /// Similar to [`Self::read_response()`], read the response header and then return a copy of it.
    pub async fn read_resp_header_parts(&mut self) -> Result<Box<ResponseHeader>> {
        self.read_response().await?;
        // safe to unwrap because it is just read
        Ok(Box::new(self.resp_header().unwrap().clone()))
    }

    /// Return a reference of the [`ResponseHeader`] if the response is read
    pub fn resp_header(&self) -> Option<&ResponseHeader> {
        self.response_header.as_deref()
    }

    /// Get the header value for the given header name from the response header
    /// If there are multiple headers under the same name, the first one will be returned
    /// Use `self.resp_header().header.get_all(name)` to get all the headers under the same name
    /// Always return `None` if the response is not read yet.
    pub fn get_header(&self, name: impl AsHeaderName) -> Option<&HeaderValue> {
        self.response_header
            .as_ref()
            .and_then(|h| h.headers.get(name))
    }

    /// Get the request header as raw bytes, `b""` when the header doesn't exist or response not read
    pub fn get_header_bytes(&self, name: impl AsHeaderName) -> &[u8] {
        self.get_header(name).map_or(b"", |v| v.as_bytes())
    }

    /// Return the status code of the response if read
    pub fn get_status(&self) -> Option<StatusCode> {
        self.response_header.as_ref().map(|h| h.status)
    }

    async fn do_read_body(&mut self) -> Result<Option<BufRef>> {
        self.init_body_reader();
        self.body_reader
            .read_body(&mut self.underlying_stream)
            .await
    }

    /// Read the response body into the internal buffer.
    /// Return `Ok(Some(ref)) after a successful read.
    /// Return `Ok(None)` if there is no more body to read.
    pub async fn read_body_ref(&mut self) -> Result<Option<&[u8]>> {
        let result = match self.read_timeout {
            Some(t) => match timeout(t, self.do_read_body()).await {
                Ok(res) => res,
                Err(_) => Error::e_explain(ReadTimedout, format!("reading body, timeout: {t:?}")),
            },
            None => self.do_read_body().await,
        };

        result.map(|maybe_body| {
            maybe_body.map(|body_ref| {
                let slice = self.body_reader.get_body(&body_ref);
                self.body_recv = self.body_recv.saturating_add(slice.len());
                slice
            })
        })
    }

    /// Similar to [`Self::read_body_ref`] but return `Bytes` instead of a slice reference.
    pub async fn read_body_bytes(&mut self) -> Result<Option<Bytes>> {
        let read = self.read_body_ref().await?;
        Ok(read.map(Bytes::copy_from_slice))
    }

    /// Upstream response body bytes received (payload only; excludes headers/framing).
    pub fn body_bytes_received(&self) -> usize {
        self.body_recv
    }

    /// Whether there is no more body to read.
    pub fn is_body_done(&mut self) -> bool {
        self.init_body_reader();
        self.body_reader.body_done()
    }

    pub fn set_allow_h1_response_invalid_content_length(&mut self, allow: bool) {
        self.allow_h1_response_invalid_content_length = allow;
    }

    pub(super) fn get_headers_raw(&self) -> &[u8] {
        // TODO: these get_*() could panic. handle them better
        self.raw_header.as_ref().unwrap().get(&self.buf[..])
    }

    /// Get the raw response header bytes
    pub fn get_headers_raw_bytes(&self) -> Bytes {
        self.raw_header.as_ref().unwrap().get_bytes(&self.buf)
    }

    fn set_keepalive(&mut self, seconds: Option<u64>) {
        match seconds {
            Some(sec) => {
                if sec > 0 {
                    self.keepalive_timeout = KeepaliveStatus::Timeout(Duration::from_secs(sec));
                } else {
                    self.keepalive_timeout = KeepaliveStatus::Infinite;
                }
            }
            None => {
                self.keepalive_timeout = KeepaliveStatus::Off;
            }
        }
    }

    /// Apply keepalive settings according to the server's response
    /// For HTTP 1.1, assume keepalive as long as there is no `Connection: Close` request header.
    /// For HTTP 1.0, only keepalive if there is an explicit header `Connection: keep-alive`.
    pub fn respect_keepalive(&mut self) {
        if self.upgraded || self.get_status() == Some(StatusCode::SWITCHING_PROTOCOLS) {
            // make sure the connection is closed at the end when 101/upgrade is used
            self.set_keepalive(None);
            return;
        }
        if self.body_reader.need_init() || self.close_delimited_resp {
            // Defense-in-depth: response body close-delimited (or no body interpretation
            // upon reuse check)
            // explicitly disable reuse
            self.set_keepalive(None);
            return;
        }
        if self.body_reader.has_bytes_overread() {
            // if more bytes sent than expected, there are likely more bytes coming
            // so don't reuse this connection
            self.set_keepalive(None);
            return;
        }

        // Per [RFC 9112 Section 6.1-16](https://datatracker.ietf.org/doc/html/rfc9112#section-6.1-16),
        // if Transfer-Encoding is received in HTTP/1.0 response, connection MUST be closed after processing.
        if self.resp_header().map(|h| h.version) == Some(Version::HTTP_10)
            && self
                .resp_header()
                .and_then(|h| h.headers.get(header::TRANSFER_ENCODING))
                .is_some()
        {
            self.set_keepalive(None);
            return;
        }
        if let Some(keepalive) = self.is_connection_keepalive() {
            if keepalive {
                let (timeout, _max_use) = self.get_keepalive_values();
                // TODO: respect max_use
                match timeout {
                    Some(d) => self.set_keepalive(Some(d)),
                    None => self.set_keepalive(Some(0)), // infinite
                }
            } else {
                self.set_keepalive(None);
            }
        } else if self.resp_header().map(|h| h.version) == Some(Version::HTTP_11) {
            self.set_keepalive(Some(0)); // on by default for http 1.1
        } else {
            self.set_keepalive(None); // off by default for http 1.0
        }
    }

    // Whether this session will be kept alive
    pub fn will_keepalive(&self) -> bool {
        !matches!(self.keepalive_timeout, KeepaliveStatus::Off)
    }

    fn is_connection_keepalive(&self) -> Option<bool> {
        let request_keepalive = self
            .request_written
            .as_ref()
            .and_then(|req| is_buf_keepalive(req.headers.get(header::CONNECTION)));

        match request_keepalive {
            // ignore what the server sends if request disables keepalive explicitly
            Some(false) => Some(false),
            _ => is_buf_keepalive(self.get_header(header::CONNECTION)),
        }
    }

    /// `Keep-Alive: timeout=5, max=1000` => 5, 1000
    /// This is defined in the below spec, this not part of any RFC, so
    /// it's behavior is different on different platforms.
    /// https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Keep-Alive
    fn get_keepalive_values(&self) -> (Option<u64>, Option<usize>) {
        let Some(keep_alive_header) = self.get_header("Keep-Alive") else {
            return (None, None);
        };

        let Ok(header_value) = str::from_utf8(keep_alive_header.as_bytes()) else {
            return (None, None);
        };

        let mut timeout = None;
        let mut max = None;

        for param in header_value.split(',') {
            let parts = param.split_once('=').map(|(k, v)| (k.trim(), v));
            match parts {
                Some(("timeout", timeout_value)) => timeout = timeout_value.trim().parse().ok(),
                Some(("max", max_value)) => max = max_value.trim().parse().ok(),
                _ => {}
            }
        }

        (timeout, max)
    }

    /// Close the connection abruptly. This allows to signal the server that the connection is closed
    /// before dropping [`HttpSession`]
    pub async fn shutdown(&mut self) {
        let _ = self.underlying_stream.shutdown().await;
    }

    /// Consume `self`, if the connection can be reused, the underlying stream will be returned.
    /// The returned connection can be kept in a connection pool so that next time the same
    /// server is being contacted. A new client session can be created via [`Self::new()`].
    /// If the connection cannot be reused, the underlying stream will be closed and `None` will be
    /// returned.
    pub async fn reuse(mut self) -> Option<Stream> {
        // TODO: this function is unnecessarily slow for keepalive case
        // because that case does not need async
        match self.keepalive_timeout {
            KeepaliveStatus::Off => {
                debug!("HTTP shutdown connection");
                self.shutdown().await;
                None
            }
            _ => Some(self.underlying_stream),
        }
    }

    fn init_body_reader(&mut self) {
        if self.body_reader.need_init() {
            // follow https://datatracker.ietf.org/doc/html/rfc9112#section-6.3
            let preread_body = self.preread_body.as_ref().unwrap().get(&self.buf[..]);

            if let Some(req) = self.request_written.as_ref() {
                if req.method == http::method::Method::HEAD {
                    self.body_reader.init_content_length(0, preread_body);
                    return;
                }
            }

            let upgraded = if let Some(code) = self.get_status() {
                match code.as_u16() {
                    101 => self.is_upgrade_req(),
                    100..=199 => {
                        // informational headers, not enough to init body reader
                        return;
                    }
                    204 | 304 => {
                        // no body by definition
                        self.body_reader.init_content_length(0, preread_body);
                        return;
                    }
                    _ => false,
                }
            } else {
                false
            };

            if upgraded {
                self.body_reader.init_close_delimited(preread_body);
                self.close_delimited_resp = true;
            } else if self.is_chunked_encoding() {
                // if chunked encoding, content-length should be ignored
                self.body_reader.init_chunked(preread_body);
            } else if let Some(cl) = self.get_content_length().unwrap_or(None) {
                self.body_reader.init_content_length(cl, preread_body);
            } else {
                self.body_reader.init_close_delimited(preread_body);
                self.close_delimited_resp = true;
            }
        }
    }

    /// Whether this request is for upgrade
    pub fn is_upgrade_req(&self) -> bool {
        match self.request_written.as_deref() {
            Some(req) => is_upgrade_req(req),
            None => false,
        }
    }

    /// `Some(true)` if the this is a successful upgrade
    /// `Some(false)` if the request is an upgrade but the response refuses it
    /// `None` if the request is not an upgrade.
    fn is_upgrade(&self, header: &ResponseHeader) -> Option<bool> {
        if self.is_upgrade_req() {
            Some(is_upgrade_resp(header))
        } else {
            None
        }
    }

    /// Was this request successfully turned into an upgraded connection?
    ///
    /// Both the request had to have been an `Upgrade` request
    /// and the response had to have been a `101 Switching Protocols`.
    pub fn was_upgraded(&self) -> bool {
        self.upgraded
    }

    /// If upgraded but not yet converted, then body writer will be
    /// converted to http1.0 mode (pass through bytes as-is).
    pub fn maybe_upgrade_body_writer(&mut self) {
        if self.was_upgraded() {
            self.received_upgrade_req_body = true;
            self.body_writer.convert_to_close_delimited();
        }
    }

    fn get_content_length(&self) -> Result<Option<usize>> {
        buf_to_content_length(
            self.get_header(header::CONTENT_LENGTH)
                .map(|v| v.as_bytes()),
        )
    }

    fn is_chunked_encoding(&self) -> bool {
        self.resp_header()
            .map(|h| is_chunked_encoding_from_headers(&h.headers))
            .unwrap_or(false)
    }

    fn init_req_body_writer(&mut self, header: &RequestHeader) {
        self.init_body_writer_comm(&header.headers)
    }

    fn init_body_writer_comm(&mut self, headers: &HMap) {
        if is_chunked_encoding_from_headers(headers) {
            // transfer-encoding takes priority over content-length
            self.body_writer.init_chunked();
        } else {
            let content_length =
                header_value_content_length(headers.get(http::header::CONTENT_LENGTH));
            match content_length {
                Some(length) => {
                    self.body_writer.init_content_length(length);
                }
                None => {
                    // Per RFC 9112: "Request messages are never close-delimited because they are
                    // always explicitly framed by length or transfer coding, with the absence of
                    // both implying the request ends immediately after the header section."
                    // Requests without Content-Length or Transfer-Encoding have 0 body
                    self.body_writer.init_content_length(0);
                }
            }
        }
    }

    // should (continue to) try to read response header or start reading response body
    fn should_read_resp_header(&self) -> bool {
        match self.get_status().map(|s| s.as_u16()) {
            Some(101) => false,      // switching protocol successful, no more header to read
            Some(100..=199) => true, // only informational header read
            Some(_) => false,
            None => true, // no response code, no header read yet
        }
    }

    pub async fn read_response_task(&mut self) -> Result<HttpTask> {
        if self.should_read_resp_header() {
            let resp_header = self.read_resp_header_parts().await?;
            let end_of_body = self.is_body_done();
            debug!("Response header: {resp_header:?}");
            trace!(
                "Raw Response header: {:?}",
                str::from_utf8(self.get_headers_raw()).unwrap()
            );
            Ok(HttpTask::Header(resp_header, end_of_body))
        } else if self.is_body_done() {
            // no body
            debug!("Response is done");
            Ok(HttpTask::Done)
        } else {
            /* need to read body */
            let body = self.read_body_bytes().await?;
            let end_of_body = self.is_body_done();
            debug!(
                "Response body: {} bytes, end: {end_of_body}",
                body.as_ref().map_or(0, |b| b.len())
            );
            trace!("Response body: {body:?}, upgraded: {}", self.upgraded);
            if self.upgraded {
                Ok(HttpTask::UpgradedBody(body, end_of_body))
            } else {
                Ok(HttpTask::Body(body, end_of_body))
            }
        }
        // TODO: support h1 trailer
    }

    /// Return the [Digest] of the connection
    ///
    /// For reused connection, the timing in the digest will reflect its initial handshakes
    /// The caller should check if the connection is reused to avoid misuse the timing field.
    pub fn digest(&self) -> &Digest {
        &self.digest
    }

    /// Return a mutable [Digest] reference for the connection.
    pub fn digest_mut(&mut self) -> &mut Digest {
        &mut self.digest
    }

    /// Return the server (peer) address recorded in the connection digest.
    pub fn server_addr(&self) -> Option<&SocketAddr> {
        self.digest()
            .socket_digest
            .as_ref()
            .map(|d| d.peer_addr())?
    }

    /// Return the client (local) address recorded in the connection digest.
    pub fn client_addr(&self) -> Option<&SocketAddr> {
        self.digest()
            .socket_digest
            .as_ref()
            .map(|d| d.local_addr())?
    }

    /// Get the reference of the [Stream] that this HTTP session is operating upon.
    pub fn stream(&self) -> &Stream {
        &self.underlying_stream
    }

    /// Consume `self`, the underlying [Stream] will be returned and can be used
    /// directly, for example, in the case of HTTP upgrade. It is not flushed
    /// prior to being returned.
    pub fn into_inner(self) -> Stream {
        self.underlying_stream
    }
}

#[inline]
fn parse_resp_buffer<'buf>(
    resp: &mut httparse::Response<'_, 'buf>,
    buf: &'buf [u8],
) -> HeaderParseState {
    let mut parser = httparse::ParserConfig::default();
    parser.allow_spaces_after_header_name_in_responses(true);
    parser.allow_obsolete_multiline_headers_in_responses(true);
    let res = match parser.parse_response(resp, buf) {
        Ok(s) => s,
        Err(e) => {
            return HeaderParseState::Invalid(e);
        }
    };
    match res {
        httparse::Status::Complete(s) => HeaderParseState::Complete(s),
        _ => HeaderParseState::Partial,
    }
}

// TODO: change it to to_buf
#[inline]
pub fn http_req_header_to_wire(req: &RequestHeader) -> Option<BytesMut> {
    let mut buf = BytesMut::with_capacity(512);

    // Request-Line
    let method = req.method.as_str().as_bytes();
    buf.put_slice(method);
    buf.put_u8(b' ');
    buf.put_slice(req.raw_path());
    buf.put_u8(b' ');

    let version = match req.version {
        Version::HTTP_09 => "HTTP/0.9",
        Version::HTTP_10 => "HTTP/1.0",
        Version::HTTP_11 => "HTTP/1.1",
        Version::HTTP_2 => "HTTP/2",
        _ => {
            return None; /*TODO: unsupported version */
        }
    };
    buf.put_slice(version.as_bytes());
    buf.put_slice(CRLF);

    // headers
    req.header_to_h1_wire(&mut buf);
    buf.put_slice(CRLF);
    Some(buf)
}

impl UniqueID for HttpSession {
    fn id(&self) -> UniqueIDType {
        self.underlying_stream.id()
    }
}

#[cfg(test)]
mod tests_stream {
    use super::*;
    use crate::protocols::http::v1::body::{BodyMode, ParseState};
    use crate::upstreams::peer::PeerOptions;
    use crate::ErrorType;
    use rstest::rstest;
    use tokio_test::io::Builder;

    fn init_log() {
        let _ = env_logger::builder().is_test(true).try_init();
    }

    #[tokio::test]
    async fn read_basic_response() {
        init_log();
        let input = b"HTTP/1.1 200 OK\r\n\r\n";
        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let res = http_stream.read_response().await;
        assert_eq!(input.len(), res.unwrap());
        assert_eq!(0, http_stream.resp_header().unwrap().headers.len());
    }

    #[tokio::test]
    async fn read_response_custom_reason() {
        init_log();
        let input = b"HTTP/1.1 200 Just Fine\r\n\r\n";
        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let res = http_stream.read_response().await;
        assert_eq!(input.len(), res.unwrap());
        assert_eq!(
            http_stream.resp_header().unwrap().get_reason_phrase(),
            Some("Just Fine")
        );
    }

    #[tokio::test]
    async fn read_response_default() {
        init_log();
        let input_header = b"HTTP/1.1 200 OK\r\n\r\n";
        let input_body = b"abc";
        let input_close = b""; // simulating close
        let mock_io = Builder::new()
            .read(&input_header[..])
            .read(&input_body[..])
            .read(&input_close[..])
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let res = http_stream.read_response().await;
        assert_eq!(input_header.len(), res.unwrap());
        let res = http_stream.read_body_ref().await.unwrap();
        assert_eq!(res.unwrap(), input_body);
        assert_eq!(
            http_stream.body_reader.body_state,
            ParseState::UntilClose(3)
        );
        let res = http_stream.read_body_ref().await.unwrap();
        assert_eq!(res, None);
        assert_eq!(http_stream.body_reader.body_state, ParseState::Complete(3));
    }

    #[tokio::test]
    async fn body_bytes_received_content_length() {
        init_log();
        let input_header = b"HTTP/1.1 200 OK\r\nContent-Length: 3\r\n\r\n";
        let input_body = b"abc";
        let input_close = b""; // simulating close
        let mock_io = Builder::new()
            .read(&input_header[..])
            .read(&input_body[..])
            .read(&input_close[..])
            .build();
        let mut http = HttpSession::new(Box::new(mock_io));
        http.read_response().await.unwrap();
        let _ = http.read_body_ref().await.unwrap();
        let _ = http.read_body_ref().await.unwrap();
        assert_eq!(http.body_bytes_received(), 3);
    }

    #[tokio::test]
    async fn body_bytes_received_chunked() {
        init_log();
        let input_header = b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n";
        let input_body = b"3\r\nabc\r\n0\r\n\r\n";
        let mock_io = Builder::new()
            .read(&input_header[..])
            .read(&input_body[..])
            .build();
        let mut http = HttpSession::new(Box::new(mock_io));
        http.read_response().await.unwrap();
        // first read returns the payload chunk
        let first = http.read_body_ref().await.unwrap();
        assert_eq!(first.unwrap(), b"abc");
        // next read consumes terminating chunk
        let _ = http.read_body_ref().await.unwrap();
        assert_eq!(http.body_bytes_received(), 3);
    }

    #[tokio::test]
    async fn h1_body_bytes_received_http10_until_close() {
        init_log();
        let header = b"HTTP/1.1 200 OK\r\n\r\n";
        let body = b"abc";
        let close = b"";
        let mock = Builder::new()
            .read(&header[..])
            .read(&body[..])
            .read(&close[..])
            .build();
        let mut http = HttpSession::new(Box::new(mock));
        http.read_response().await.unwrap();
        let _ = http.read_body_ref().await.unwrap();
        let _ = http.read_body_ref().await.unwrap();
        assert_eq!(http.body_bytes_received(), 3);
    }

    #[tokio::test]
    async fn h1_body_bytes_received_chunked_multi() {
        init_log();
        let header = b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n";
        let body = b"1\r\na\r\n2\r\nbc\r\n0\r\n\r\n"; // payload abc
        let mock = Builder::new().read(&header[..]).read(&body[..]).build();
        let mut http = HttpSession::new(Box::new(mock));
        http.read_response().await.unwrap();
        // first chunk
        let s1 = http.read_body_ref().await.unwrap().unwrap();
        assert_eq!(s1, b"a");
        // second chunk
        let s2 = http.read_body_ref().await.unwrap().unwrap();
        assert_eq!(s2, b"bc");
        // end
        let _ = http.read_body_ref().await.unwrap();
        assert_eq!(http.body_bytes_received(), 3);
    }

    #[tokio::test]
    async fn h1_body_bytes_received_preread_in_header_buf() {
        init_log();
        // header and a small body arrive together
        let combined = b"HTTP/1.1 200 OK\r\n\r\nabc";
        let close = b"";
        let mock = Builder::new().read(&combined[..]).read(&close[..]).build();
        let mut http = HttpSession::new(Box::new(mock));
        http.read_response().await.unwrap();
        // first body read should return the preread bytes
        let s = http.read_body_ref().await.unwrap().unwrap();
        assert_eq!(s, b"abc");
        // then EOF
        let _ = http.read_body_ref().await.unwrap();
        assert_eq!(http.body_bytes_received(), 3);
    }

    #[tokio::test]
    async fn h1_body_bytes_received_overread_content_length() {
        init_log();
        let header1 = b"HTTP/1.1 200 OK\r\n";
        let header2 = b"Content-Length: 2\r\n\r\n";
        let body = b"abc"; // one extra byte beyond CL
        let mock = Builder::new()
            .read(&header1[..])
            .read(&header2[..])
            .read(&body[..])
            .build();
        let mut http = HttpSession::new(Box::new(mock));
        http.read_response().await.unwrap();
        let s = http.read_body_ref().await.unwrap().unwrap();
        assert_eq!(s, b"ab");
        // then end
        let _ = http.read_body_ref().await.unwrap();
        assert_eq!(http.body_bytes_received(), 2);
    }

    #[tokio::test]
    async fn h1_body_bytes_received_after_100_continue() {
        init_log();
        let info = b"HTTP/1.1 100 Continue\r\n\r\n";
        let header = b"HTTP/1.1 200 OK\r\nContent-Length: 1\r\n\r\n";
        let body = b"x";
        let mock = Builder::new()
            .read(&info[..])
            .read(&header[..])
            .read(&body[..])
            .build();
        let mut http = HttpSession::new(Box::new(mock));
        // read informational
        match http.read_response_task().await.unwrap() {
            HttpTask::Header(h, eob) => {
                assert_eq!(h.status, 100);
                assert!(!eob);
            }
            _ => panic!("expected informational header"),
        }
        // read final header
        match http.read_response_task().await.unwrap() {
            HttpTask::Header(h, eob) => {
                assert_eq!(h.status, 200);
                assert!(!eob);
            }
            _ => panic!("expected final header"),
        }
        // read body
        let s = http.read_body_ref().await.unwrap().unwrap();
        assert_eq!(s, b"x");
        let _ = http.read_body_ref().await.unwrap();
        assert_eq!(http.body_bytes_received(), 1);
    }

    #[tokio::test]
    async fn read_response_overread() {
        init_log();
        let input_header = b"HTTP/1.1 200 OK\r\n";
        let input_header2 = b"Content-Length: 2\r\n\r\n";
        let input_body = b"abc";
        let mock_io = Builder::new()
            .read(&input_header[..])
            .read(&input_header2[..])
            .read(&input_body[..])
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let res = http_stream.read_response().await;
        assert_eq!(input_header.len() + input_header2.len(), res.unwrap());
        let res = http_stream.read_body_ref().await.unwrap();
        assert_eq!(res.unwrap(), &input_body[..2]);
        assert_eq!(http_stream.body_reader.body_state, ParseState::Complete(2));
        let res = http_stream.read_body_ref().await.unwrap();
        assert_eq!(res, None);
        assert_eq!(http_stream.body_reader.body_state, ParseState::Complete(2));
        http_stream.respect_keepalive();
        assert!(!http_stream.will_keepalive());
    }

    #[tokio::test]
    async fn read_resp_header_with_space() {
        init_log();
        let input = b"HTTP/1.1 200 OK\r\nServer : pingora\r\n\r\n";
        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let res = http_stream.read_response().await;
        assert_eq!(input.len(), res.unwrap());
        assert_eq!(1, http_stream.resp_header().unwrap().headers.len());
        assert_eq!(http_stream.get_header("Server").unwrap(), "pingora");
    }

    #[cfg(feature = "patched_http1")]
    #[tokio::test]
    async fn read_resp_header_with_utf8() {
        init_log();
        let input = "HTTP/1.1 200 OK\r\nServer👍: pingora\r\n\r\n".as_bytes();
        let mock_io = Builder::new().read(input).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let resp = http_stream.read_resp_header_parts().await.unwrap();
        assert_eq!(1, http_stream.resp_header().unwrap().headers.len());
        assert_eq!(http_stream.get_header("Server👍").unwrap(), "pingora");
        assert_eq!(resp.headers.get("Server👍").unwrap(), "pingora");
    }

    #[tokio::test]
    #[should_panic(expected = "There is still data left to read.")]
    async fn read_timeout() {
        init_log();
        let input = b"HTTP/1.1 200 OK\r\n\r\n";
        let mock_io = Builder::new()
            .wait(Duration::from_secs(2))
            .read(&input[..])
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_timeout = Some(Duration::from_secs(1));
        let res = http_stream.read_response().await;
        assert_eq!(res.unwrap_err().etype(), &ErrorType::ReadTimedout);
    }

    #[tokio::test]
    async fn read_2_buf() {
        init_log();
        let input1 = b"HTTP/1.1 200 OK\r\n";
        let input2 = b"Server: pingora\r\n\r\n";
        let mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let res = http_stream.read_response().await;
        assert_eq!(input1.len() + input2.len(), res.unwrap());
        assert_eq!(
            input1.len() + input2.len(),
            http_stream.get_headers_raw().len()
        );
        assert_eq!(1, http_stream.resp_header().unwrap().headers.len());
        assert_eq!(http_stream.get_header("Server").unwrap(), "pingora");

        assert_eq!(Some(StatusCode::OK), http_stream.get_status());
        assert_eq!(Version::HTTP_11, http_stream.resp_header().unwrap().version);
    }

    #[tokio::test]
    #[should_panic(expected = "There is still data left to read.")]
    async fn read_invalid() {
        let input1 = b"HTP/1.1 200 OK\r\n";
        let input2 = b"Server: pingora\r\n\r\n";
        let mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let res = http_stream.read_response().await;
        assert_eq!(&ErrorType::InvalidHTTPHeader, res.unwrap_err().etype());
    }

    #[tokio::test]
    async fn write() {
        let wire = b"GET /test HTTP/1.1\r\nFoo: Bar\r\n\r\n";
        let mock_io = Builder::new().write(wire).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let mut new_request = RequestHeader::build("GET", b"/test", None).unwrap();
        new_request.insert_header("Foo", "Bar").unwrap();
        let n = http_stream
            .write_request_header(Box::new(new_request))
            .await
            .unwrap();
        assert_eq!(wire.len(), n);
    }

    #[rstest]
    #[case::negative("-1")]
    #[case::not_a_number("abc")]
    #[case::float("1.5")]
    #[case::empty("")]
    #[case::spaces("  ")]
    #[case::mixed("123abc")]
    #[tokio::test]
    async fn validate_response_rejects_invalid_content_length(#[case] invalid_value: &str) {
        init_log();
        let input = format!(
            "HTTP/1.1 200 OK\r\nServer: test\r\nContent-Length: {}\r\n\r\n",
            invalid_value
        );
        let mock_io = Builder::new().read(input.as_bytes()).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        // read_response calls validate_response internally, so it should fail here
        let res = http_stream.read_response().await;
        assert!(res.is_err());
        assert_eq!(res.unwrap_err().etype(), &ErrorType::InvalidHTTPHeader);
    }

    #[tokio::test]
    async fn allow_invalid_content_length_close_delimited_when_configured() {
        init_log();
        let input_header = b"HTTP/1.1 200 OK\r\nServer: test\r\nContent-Length: abc\r\n\r\n";
        let input_body = b"abc";
        let input_close = b"";
        let mock_io = Builder::new()
            .read(&input_header[..])
            .read(&input_body[..])
            .read(&input_close[..])
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let mut peer_options = PeerOptions::new();
        peer_options.allow_h1_response_invalid_content_length = true;
        http_stream.set_allow_h1_response_invalid_content_length(
            peer_options.allow_h1_response_invalid_content_length,
        );

        let res = http_stream.read_response().await;
        assert!(res.is_ok());
        let body = http_stream.read_body_ref().await.unwrap().unwrap();
        assert_eq!(body, input_body);
        assert_eq!(
            http_stream.body_reader.body_state,
            ParseState::UntilClose(3)
        );
        let body = http_stream.read_body_ref().await.unwrap();
        assert!(body.is_none());
        assert_eq!(http_stream.body_reader.body_state, ParseState::Complete(3));
    }

    #[rstest]
    #[case::valid_zero("0")]
    #[case::valid_small("123")]
    #[case::valid_large("999999")]
    #[tokio::test]
    async fn validate_response_accepts_valid_content_length(#[case] valid_value: &str) {
        init_log();
        let input = format!(
            "HTTP/1.1 200 OK\r\nServer: test\r\nContent-Length: {}\r\n\r\n",
            valid_value
        );
        let mock_io = Builder::new().read(input.as_bytes()).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let res = http_stream.read_response().await;
        assert!(res.is_ok());
    }

    #[tokio::test]
    async fn validate_response_accepts_no_content_length() {
        init_log();
        let input = b"HTTP/1.1 200 OK\r\nServer: test\r\n\r\n";
        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let res = http_stream.read_response().await;
        assert!(res.is_ok());
    }

    #[rstest]
    #[case(None, None, None)]
    #[case(Some("transfer-encoding"), None, None)]
    #[case(Some("transfer-encoding"), Some("CONTENT-LENGTH"), Some("4"))]
    #[case(Some("TRANSFER-ENCODING"), Some("CONTENT-LENGTH"), Some("4"))]
    #[case(Some("TRANSFER-ENCODING"), None, None)]
    #[case(None, Some("CONTENT-LENGTH"), Some("4"))]
    #[case(Some("TRANSFER-ENCODING"), Some("content-length"), Some("4"))]
    #[case(None, Some("content-length"), Some("4"))]
    #[case(Some("TRANSFER-ENCODING"), Some("CONTENT-LENGTH"), Some("abc"))]
    #[tokio::test]
    async fn response_transfer_encoding_and_content_length_handling(
        #[case] transfer_encoding_header: Option<&str>,
        #[case] content_length_header: Option<&str>,
        #[case] content_length_value: Option<&str>,
    ) {
        init_log();
        let input1 = b"HTTP/1.1 200 OK\r\n";
        let mut input2 = "Server: test\r\n".to_owned();

        if let Some(transfer_encoding) = transfer_encoding_header {
            input2 += &format!("{transfer_encoding}: chunked\r\n");
        }
        if let Some(content_length) = content_length_header {
            let value = content_length_value.unwrap_or("4");
            input2 += &format!("{content_length}: {value}\r\n")
        }

        input2 += "\r\n";
        let mock_io = Builder::new()
            .read(&input1[..])
            .read(input2.as_bytes())
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let _ = http_stream.read_response().await.unwrap();

        match (content_length_header, transfer_encoding_header) {
            (Some(_) | None, Some(_)) => {
                assert!(http_stream.get_header(header::TRANSFER_ENCODING).is_some());
                assert!(http_stream.get_header(header::CONTENT_LENGTH).is_none());
            }
            (Some(_), None) => {
                assert!(http_stream.get_header(header::TRANSFER_ENCODING).is_none());
                assert!(http_stream.get_header(header::CONTENT_LENGTH).is_some());
            }
            _ => {
                assert!(http_stream.get_header(header::CONTENT_LENGTH).is_none());
                assert!(http_stream.get_header(header::TRANSFER_ENCODING).is_none());
            }
        }
    }

    #[tokio::test]
    #[should_panic(expected = "There is still data left to write.")]
    async fn write_timeout() {
        let wire = b"GET /test HTTP/1.1\r\nFoo: Bar\r\n\r\n";
        let mock_io = Builder::new()
            .wait(Duration::from_secs(2))
            .write(wire)
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.write_timeout = Some(Duration::from_secs(1));
        let mut new_request = RequestHeader::build("GET", b"/test", None).unwrap();
        new_request.insert_header("Foo", "Bar").unwrap();
        let res = http_stream
            .write_request_header(Box::new(new_request))
            .await;
        assert_eq!(res.unwrap_err().etype(), &ErrorType::WriteTimedout);
    }

    #[tokio::test]
    #[should_panic(expected = "There is still data left to write.")]
    async fn write_body_timeout() {
        // Test needs Content-Length header to actually attempt to write body
        let header = b"POST /test HTTP/1.1\r\nContent-Length: 3\r\n\r\n";
        let body = b"abc";
        let mock_io = Builder::new()
            .write(&header[..])
            .wait(Duration::from_secs(2))
            .write(&body[..])
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.write_timeout = Some(Duration::from_secs(1));

        let mut new_request = RequestHeader::build("POST", b"/test", None).unwrap();
        new_request.insert_header("Content-Length", "3").unwrap();
        http_stream
            .write_request_header(Box::new(new_request))
            .await
            .unwrap();
        let res = http_stream.write_body(body).await;
        assert_eq!(res.unwrap_err().etype(), &WriteTimedout);
    }

    #[cfg(feature = "patched_http1")]
    #[tokio::test]
    async fn write_invalid_path() {
        let wire = b"GET /\x01\xF0\x90\x80 HTTP/1.1\r\nFoo: Bar\r\n\r\n";
        let mock_io = Builder::new().write(wire).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let mut new_request = RequestHeader::build("GET", b"/\x01\xF0\x90\x80", None).unwrap();
        new_request.insert_header("Foo", "Bar").unwrap();
        let n = http_stream
            .write_request_header(Box::new(new_request))
            .await
            .unwrap();
        assert_eq!(wire.len(), n);
    }

    #[tokio::test]
    async fn read_informational() {
        init_log();
        let input1 = b"HTTP/1.1 100 Continue\r\n\r\n";
        let input2 = b"HTTP/1.1 204 OK\r\nServer: pingora\r\n\r\n";
        let mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));

        // read 100 header first
        let task = http_stream.read_response_task().await.unwrap();
        match task {
            HttpTask::Header(h, eob) => {
                assert_eq!(h.status, 100);
                assert!(!eob);
            }
            _ => {
                panic!("task should be header")
            }
        }
        // read 200 header next
        let task = http_stream.read_response_task().await.unwrap();
        match task {
            HttpTask::Header(h, eob) => {
                assert_eq!(h.status, 204);
                assert!(eob);
            }
            _ => {
                panic!("task should be header")
            }
        }
    }

    #[tokio::test]
    async fn read_informational_combined_with_final() {
        init_log();
        let input = b"HTTP/1.1 100 Continue\r\n\r\nHTTP/1.1 200 OK\r\nServer: pingora\r\nContent-Length: 3\r\n\r\n";
        let body = b"abc";
        let mock_io = Builder::new().read(&input[..]).read(&body[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));

        // read 100 header first
        let task = http_stream.read_response_task().await.unwrap();
        match task {
            HttpTask::Header(h, eob) => {
                assert_eq!(h.status, 100);
                assert!(!eob);
            }
            _ => {
                panic!("task should be header")
            }
        }
        // read 200 header next
        let task = http_stream.read_response_task().await.unwrap();
        match task {
            HttpTask::Header(h, eob) => {
                assert_eq!(h.status, 200);
                assert!(!eob);
            }
            _ => {
                panic!("task should be header")
            }
        }
        // read body next
        let task = http_stream.read_response_task().await.unwrap();
        match task {
            HttpTask::Body(b, eob) => {
                assert_eq!(b.unwrap(), &body[..]);
                assert!(eob);
            }
            _ => {
                panic!("task {task:?} should be body")
            }
        }
    }

    #[tokio::test]
    async fn read_informational_multiple_combined_with_final() {
        init_log();
        let input = b"HTTP/1.1 100 Continue\r\n\r\nHTTP/1.1 103 Early Hints\r\n\r\nHTTP/1.1 204 No Content\r\nServer: pingora\r\n\r\n";
        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));

        // read 100 header first
        let task = http_stream.read_response_task().await.unwrap();
        match task {
            HttpTask::Header(h, eob) => {
                assert_eq!(h.status, 100);
                assert!(!eob);
            }
            _ => {
                panic!("task should be header")
            }
        }

        // then read 103 header
        let task = http_stream.read_response_task().await.unwrap();
        match task {
            HttpTask::Header(h, eob) => {
                assert_eq!(h.status, 103);
                assert!(!eob);
            }
            _ => {
                panic!("task should be header")
            }
        }

        // finally read 200 header
        let task = http_stream.read_response_task().await.unwrap();
        match task {
            HttpTask::Header(h, eob) => {
                assert_eq!(h.status, 204);
                assert!(eob);
            }
            _ => {
                panic!("task should be header")
            }
        }
    }

    #[tokio::test]
    async fn read_informational_then_keepalive_response() {
        init_log();
        // Test that after reading an informational response (100 Continue),
        // keepalive still works properly
        let wire = b"GET / HTTP/1.1\r\n\r\n";
        let input1 = b"HTTP/1.1 100 Continue\r\n\r\n";
        let input2 = b"HTTP/1.1 200 OK\r\nContent-Length: 13\r\n\r\n"; // Proper Content-Length
        let body = b"response body";

        let mock_io = Builder::new()
            .write(&wire[..])
            .read(&input1[..])
            .read(&input2[..])
            .read(&body[..])
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));

        // Write request
        let new_request = RequestHeader::build("GET", b"/", None).unwrap();
        http_stream
            .write_request_header(Box::new(new_request))
            .await
            .unwrap();

        // Read 100 Continue
        let task = http_stream.read_response_task().await.unwrap();
        match task {
            HttpTask::Header(h, eob) => {
                assert_eq!(h.status, 100);
                assert!(!eob);
            }
            _ => {
                panic!("task should be informational header")
            }
        }

        // Read final 200 OK header
        let task = http_stream.read_response_task().await.unwrap();
        match task {
            HttpTask::Header(h, eob) => {
                assert_eq!(h.status, 200);
                assert!(!eob); // Should not be end of body yet
            }
            _ => {
                panic!("task should be final header")
            }
        }

        // Read body
        let task = http_stream.read_response_task().await.unwrap();
        match task {
            HttpTask::Body(b, eob) => {
                assert_eq!(b.unwrap(), &body[..]);
                assert!(eob); // EOF - body is complete
            }
            _ => {
                panic!("task {task:?} should be body")
            }
        }

        assert_eq!(http_stream.body_reader.body_state, ParseState::Complete(13));

        // Keepalive should be enabled for properly-framed HTTP/1.1
        http_stream.respect_keepalive();
        assert!(http_stream.will_keepalive());
    }

    #[tokio::test]
    async fn init_body_for_upgraded_req() {
        let wire =
            b"GET / HTTP/1.1\r\nConnection: Upgrade\r\nUpgrade: WS\r\nContent-Length: 0\r\n\r\n";
        let input1 = b"HTTP/1.1 101 Switching Protocols\r\n\r\n";
        let input2 = b"PAYLOAD";
        let ws_data = b"data";

        let mock_io = Builder::new()
            .write(wire)
            .read(&input1[..])
            .write(&ws_data[..])
            .read(&input2[..])
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let mut new_request = RequestHeader::build("GET", b"/", None).unwrap();
        new_request.insert_header("Connection", "Upgrade").unwrap();
        new_request.insert_header("Upgrade", "WS").unwrap();
        new_request.insert_header("Content-Length", "0").unwrap();
        let _ = http_stream
            .write_request_header(Box::new(new_request))
            .await
            .unwrap();
        assert_eq!(
            http_stream.body_writer.body_mode,
            BodyMode::ContentLength(0, 0)
        );
        assert!(http_stream.body_writer.finished());

        let task = http_stream.read_response_task().await.unwrap();
        match task {
            HttpTask::Header(h, eob) => {
                assert_eq!(h.status, 101);
                assert!(!eob);
            }
            _ => {
                panic!("task should be header")
            }
        }
        // changed body mode
        assert_eq!(
            http_stream.body_reader.body_state,
            ParseState::UntilClose(0)
        );
        // request writer will be explicitly initialized in a separate call
        assert!(http_stream.body_writer.finished());
        http_stream.maybe_upgrade_body_writer();

        assert!(!http_stream.body_writer.finished());
        assert_eq!(http_stream.body_writer.body_mode, BodyMode::UntilClose(0));

        http_stream.write_body(&ws_data[..]).await.unwrap();
        // read WS
        let task = http_stream.read_response_task().await.unwrap();
        match task {
            HttpTask::UpgradedBody(b, eob) => {
                assert_eq!(b.unwrap(), &input2[..]);
                assert!(!eob);
            }
            _ => {
                panic!("task should be upgraded body")
            }
        }
    }

    #[tokio::test]
    async fn init_preread_body_for_upgraded_req() {
        let wire =
            b"GET / HTTP/1.1\r\nConnection: Upgrade\r\nUpgrade: WS\r\nContent-Length: 0\r\n\r\n";
        let input = b"HTTP/1.1 101 Switching Protocols\r\n\r\nPAYLOAD";
        let ws_data = b"data";

        let mock_io = Builder::new()
            .write(wire)
            .read(&input[..])
            .write(&ws_data[..])
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let mut new_request = RequestHeader::build("GET", b"/", None).unwrap();
        new_request.insert_header("Connection", "Upgrade").unwrap();
        new_request.insert_header("Upgrade", "WS").unwrap();
        new_request.insert_header("Content-Length", "0").unwrap();
        let _ = http_stream
            .write_request_header(Box::new(new_request))
            .await
            .unwrap();
        assert_eq!(
            http_stream.body_writer.body_mode,
            BodyMode::ContentLength(0, 0)
        );
        assert!(http_stream.body_writer.finished());

        let task = http_stream.read_response_task().await.unwrap();
        match task {
            HttpTask::Header(h, eob) => {
                assert_eq!(h.status, 101);
                assert!(!eob);
            }
            _ => {
                panic!("task should be header")
            }
        }
        // changed body mode
        assert_eq!(
            http_stream.body_reader.body_state,
            ParseState::UntilClose(0)
        );
        // request writer will be explicitly initialized in a separate call
        assert!(http_stream.body_writer.finished());
        http_stream.maybe_upgrade_body_writer();

        assert!(!http_stream.body_writer.finished());
        assert_eq!(http_stream.body_writer.body_mode, BodyMode::UntilClose(0));

        http_stream.write_body(&ws_data[..]).await.unwrap();
        // read WS
        let task = http_stream.read_response_task().await.unwrap();
        match task {
            HttpTask::UpgradedBody(b, eob) => {
                assert_eq!(b.unwrap(), &b"PAYLOAD"[..]);
                assert!(!eob);
            }
            _ => {
                panic!("task should be upgraded body")
            }
        }
    }

    #[tokio::test]
    async fn read_body_eos_after_upgrade() {
        let wire =
            b"GET / HTTP/1.1\r\nConnection: Upgrade\r\nUpgrade: WS\r\nContent-Length: 10\r\n\r\n";
        let input1 = b"HTTP/1.1 101 Switching Protocols\r\n\r\n";
        let input2 = b"PAYLOAD";
        let body_data = b"0123456789";
        let ws_data = b"data";

        let mock_io = Builder::new()
            .write(wire)
            .read(&input1[..])
            .write(&body_data[..])
            .read(&input2[..])
            .write(&ws_data[..])
            .build();

        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let mut new_request = RequestHeader::build("GET", b"/", None).unwrap();
        new_request.insert_header("Connection", "Upgrade").unwrap();
        new_request.insert_header("Upgrade", "WS").unwrap();
        new_request.insert_header("Content-Length", "10").unwrap();
        let _ = http_stream
            .write_request_header(Box::new(new_request))
            .await
            .unwrap();
        assert_eq!(
            http_stream.body_writer.body_mode,
            BodyMode::ContentLength(10, 0)
        );
        assert!(!http_stream.body_writer.finished());

        let task = http_stream.read_response_task().await.unwrap();
        match task {
            HttpTask::Header(h, eob) => {
                assert_eq!(h.status, 101);
                assert!(!eob);
            }
            _ => {
                panic!("task should be header")
            }
        }
        // changed body mode
        assert_eq!(
            http_stream.body_reader.body_state,
            ParseState::UntilClose(0)
        );

        // write regular request payload
        http_stream.write_body(&body_data[..]).await.unwrap();
        http_stream.finish_body().await.unwrap();

        // we should still be able to read more response body
        // read WS
        let task = http_stream.read_response_task().await.unwrap();
        match task {
            HttpTask::UpgradedBody(b, eob) => {
                assert_eq!(b.unwrap(), &input2[..]);
                assert!(!eob);
            }
            t => {
                panic!("task {t:?} should be upgraded body")
            }
        }

        // body IS finished, prior to upgrade on the downstream side
        assert!(http_stream.body_writer.finished());
        http_stream.maybe_upgrade_body_writer();

        assert!(!http_stream.body_writer.finished());
        assert_eq!(http_stream.body_writer.body_mode, BodyMode::UntilClose(0));

        http_stream.write_body(&ws_data[..]).await.unwrap();
        assert_eq!(http_stream.body_writer.body_mode, BodyMode::UntilClose(4));
        http_stream.finish_body().await.unwrap();
    }

    #[tokio::test]
    async fn read_switching_protocol() {
        init_log();

        let wire =
            b"GET / HTTP/1.1\r\nConnection: Upgrade\r\nUpgrade: WS\r\nContent-Length: 0\r\n\r\n";
        let input1 = b"HTTP/1.1 101 Continue\r\n\r\n";
        let input2 = b"PAYLOAD";

        let mock_io = Builder::new()
            .write(&wire[..])
            .read(&input1[..])
            .read(&input2[..])
            .build();

        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let mut new_request = RequestHeader::build("GET", b"/", None).unwrap();
        new_request.insert_header("Connection", "Upgrade").unwrap();
        new_request.insert_header("Upgrade", "WS").unwrap();
        new_request.insert_header("Content-Length", "0").unwrap();
        let _ = http_stream
            .write_request_header(Box::new(new_request))
            .await
            .unwrap();
        assert_eq!(
            http_stream.body_writer.body_mode,
            BodyMode::ContentLength(0, 0)
        );
        assert!(http_stream.body_writer.finished());

        // read 100 header first
        let task = http_stream.read_response_task().await.unwrap();
        match task {
            HttpTask::Header(h, eob) => {
                assert_eq!(h.status, 101);
                assert!(!eob);
            }
            _ => {
                panic!("task should be header")
            }
        }
        // read body
        let task = http_stream.read_response_task().await.unwrap();
        match task {
            HttpTask::UpgradedBody(b, eob) => {
                assert_eq!(b.unwrap(), &input2[..]);
                assert!(!eob);
            }
            _ => {
                panic!("task should be upgraded body")
            }
        }
        // read body
        let task = http_stream.read_response_task().await.unwrap();
        match task {
            HttpTask::UpgradedBody(b, eob) => {
                assert!(b.is_none());
                assert!(eob);
            }
            _ => {
                panic!("task should be body with end of stream")
            }
        }
    }

    // Note: in debug mode, due to from_maybe_shared_unchecked() still tries to validate headers
    // values, so the code has to replace CRLF with whitespaces. In release mode, the CRLF is
    // reserved
    #[tokio::test]
    async fn read_obsolete_multiline_headers() {
        init_log();
        let input = b"HTTP/1.1 200 OK\r\nServer : pingora\r\n Foo: Bar\r\n\r\n";
        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let res = http_stream.read_response().await;
        assert_eq!(input.len(), res.unwrap());

        assert_eq!(1, http_stream.resp_header().unwrap().headers.len());
        assert_eq!(
            http_stream.get_header("Server").unwrap(),
            "pingora   Foo: Bar"
        );

        let input = b"HTTP/1.1 200 OK\r\nServer : pingora\r\n\t  Fizz: Buzz\r\n\r\n";
        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let res = http_stream.read_response().await;
        assert_eq!(input.len(), res.unwrap());
        assert_eq!(1, http_stream.resp_header().unwrap().headers.len());
        assert_eq!(
            http_stream.get_header("Server").unwrap(),
            "pingora  \t  Fizz: Buzz"
        );
    }

    #[cfg(feature = "patched_http1")]
    #[tokio::test]
    async fn read_headers_skip_invalid_line() {
        init_log();
        let input = b"HTTP/1.1 200 OK\r\n;\r\nFoo: Bar\r\n\r\n";
        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let res = http_stream.read_response().await;
        assert_eq!(input.len(), res.unwrap());
        assert_eq!(1, http_stream.resp_header().unwrap().headers.len());
        assert_eq!(http_stream.get_header("Foo").unwrap(), "Bar");
    }

    #[tokio::test]
    async fn read_keepalive_headers() {
        init_log();

        async fn build_resp_with_keepalive(conn: &str) -> HttpSession {
            // Include Content-Length to avoid triggering defense-in-depth close-delimited check
            let input =
                format!("HTTP/1.1 200 OK\r\nConnection: {conn}\r\nContent-Length: 0\r\n\r\n");
            let mock_io = Builder::new().read(input.as_bytes()).build();
            let mut http_stream = HttpSession::new(Box::new(mock_io));
            let res = http_stream.read_response().await;
            assert_eq!(input.len(), res.unwrap());
            http_stream.respect_keepalive();
            http_stream
        }

        assert_eq!(
            build_resp_with_keepalive("close").await.keepalive_timeout,
            KeepaliveStatus::Off
        );

        assert_eq!(
            build_resp_with_keepalive("keep-alive")
                .await
                .keepalive_timeout,
            KeepaliveStatus::Infinite
        );

        assert_eq!(
            build_resp_with_keepalive("foo").await.keepalive_timeout,
            KeepaliveStatus::Infinite
        );

        assert_eq!(
            build_resp_with_keepalive("upgrade,close")
                .await
                .keepalive_timeout,
            KeepaliveStatus::Off
        );

        assert_eq!(
            build_resp_with_keepalive("upgrade, close")
                .await
                .keepalive_timeout,
            KeepaliveStatus::Off
        );

        assert_eq!(
            build_resp_with_keepalive("Upgrade, close")
                .await
                .keepalive_timeout,
            KeepaliveStatus::Off
        );

        assert_eq!(
            build_resp_with_keepalive("Upgrade,close")
                .await
                .keepalive_timeout,
            KeepaliveStatus::Off
        );

        assert_eq!(
            build_resp_with_keepalive("close,upgrade")
                .await
                .keepalive_timeout,
            KeepaliveStatus::Off
        );

        assert_eq!(
            build_resp_with_keepalive("close, upgrade")
                .await
                .keepalive_timeout,
            KeepaliveStatus::Off
        );

        assert_eq!(
            build_resp_with_keepalive("close,Upgrade")
                .await
                .keepalive_timeout,
            KeepaliveStatus::Off
        );

        assert_eq!(
            build_resp_with_keepalive("close, Upgrade")
                .await
                .keepalive_timeout,
            KeepaliveStatus::Off
        );

        async fn build_resp_with_keepalive_values(keep_alive: &str) -> HttpSession {
            let input = format!("HTTP/1.1 200 OK\r\nKeep-Alive: {keep_alive}\r\n\r\n");
            let mock_io = Builder::new().read(input.as_bytes()).build();
            let mut http_stream = HttpSession::new(Box::new(mock_io));
            let res = http_stream.read_response().await;
            assert_eq!(input.len(), res.unwrap());
            http_stream.respect_keepalive();
            http_stream
        }

        assert_eq!(
            build_resp_with_keepalive_values("timeout=5, max=1000")
                .await
                .get_keepalive_values(),
            (Some(5), Some(1000))
        );

        assert_eq!(
            build_resp_with_keepalive_values("max=1000, timeout=5")
                .await
                .get_keepalive_values(),
            (Some(5), Some(1000))
        );

        assert_eq!(
            build_resp_with_keepalive_values(" timeout = 5, max = 1000 ")
                .await
                .get_keepalive_values(),
            (Some(5), Some(1000))
        );

        assert_eq!(
            build_resp_with_keepalive_values("timeout=5")
                .await
                .get_keepalive_values(),
            (Some(5), None)
        );

        assert_eq!(
            build_resp_with_keepalive_values("max=1000")
                .await
                .get_keepalive_values(),
            (None, Some(1000))
        );

        assert_eq!(
            build_resp_with_keepalive_values("a=b")
                .await
                .get_keepalive_values(),
            (None, None)
        );

        assert_eq!(
            build_resp_with_keepalive_values("")
                .await
                .get_keepalive_values(),
            (None, None)
        );
    }

    /* Note: body tests are covered in server.rs */

    #[tokio::test]
    async fn test_http10_response_with_transfer_encoding_disables_keepalive() {
        // Transfer-Encoding in HTTP/1.0 response requires connection close
        let input = b"HTTP/1.0 200 OK\r\n\
Transfer-Encoding: chunked\r\n\
Connection: keep-alive\r\n\
\r\n\
5\r\n\
hello\r\n\
0\r\n\
\r\n";
        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_response().await.unwrap();
        http_stream.respect_keepalive();

        // Keepalive must be disabled even if Connection: keep-alive header present
        assert!(!http_stream.will_keepalive());
        assert_eq!(http_stream.keepalive_timeout, KeepaliveStatus::Off);
    }

    #[tokio::test]
    async fn test_http11_response_with_transfer_encoding_allows_keepalive() {
        // HTTP/1.1 with Transfer-Encoding should allow keepalive (contrast with HTTP/1.0)
        let input = b"HTTP/1.1 200 OK\r\n\
Transfer-Encoding: chunked\r\n\
\r\n\
5\r\n\
hello\r\n\
0\r\n\
\r\n";
        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_response().await.unwrap();
        http_stream.respect_keepalive();

        // HTTP/1.1 should allow keepalive by default
        assert!(http_stream.will_keepalive());
    }

    #[tokio::test]
    async fn test_response_multiple_transfer_encoding_headers() {
        init_log();
        // Multiple TE headers should be treated as comma-separated
        let input = b"HTTP/1.1 200 OK\r\n\
Transfer-Encoding: gzip\r\n\
Transfer-Encoding: chunked\r\n\
\r\n\
5\r\n\
hello\r\n\
0\r\n\
\r\n";

        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_response().await.unwrap();

        // Should correctly identify chunked encoding from last header
        assert!(http_stream.is_chunked_encoding());

        // Verify body can be read correctly
        let body = http_stream.read_body_bytes().await.unwrap();
        assert_eq!(body.as_ref().unwrap().as_ref(), b"hello");
        http_stream.finish_body().await.unwrap();
    }

    #[tokio::test]
    async fn test_response_multiple_te_headers_chunked_not_last() {
        init_log();
        // Chunked in first header but not last - should NOT be chunked
        let input = b"HTTP/1.1 200 OK\r\n\
Transfer-Encoding: chunked\r\n\
Transfer-Encoding: identity\r\n\
Content-Length: 5\r\n\
\r\n\
hello";

        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_response().await.unwrap();

        // Should NOT be chunked - identity is final encoding
        assert!(!http_stream.is_chunked_encoding());
    }

    #[test]
    fn test_is_chunked_encoding_before_response() {
        // Test that is_chunked_encoding returns false when no response received yet
        let mock_io = Builder::new().build();
        let http_stream = HttpSession::new(Box::new(mock_io));

        // Should return false when no response header exists yet
        assert!(!http_stream.is_chunked_encoding());
    }

    #[tokio::test]
    async fn write_request_body_implicit_zero_content_length() {
        init_log();
        let header = b"POST /test HTTP/1.1\r\n\r\n";
        let mock_io = Builder::new().write(&header[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));

        let new_request = RequestHeader::build("POST", b"/test", None).unwrap();
        http_stream
            .write_request_header(Box::new(new_request))
            .await
            .unwrap();

        assert_eq!(
            http_stream.body_writer.body_mode,
            BodyMode::ContentLength(0, 0)
        );
    }

    #[tokio::test]
    async fn write_request_body_with_content_length() {
        init_log();
        let header = b"POST /test HTTP/1.1\r\nContent-Length: 3\r\n\r\n";
        let body = b"abc";
        let mock_io = Builder::new().write(&header[..]).write(&body[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));

        let mut new_request = RequestHeader::build("POST", b"/test", None).unwrap();
        new_request.insert_header("Content-Length", "3").unwrap();
        http_stream
            .write_request_header(Box::new(new_request))
            .await
            .unwrap();

        assert_eq!(
            http_stream.body_writer.body_mode,
            BodyMode::ContentLength(3, 0)
        );

        http_stream.write_body(body).await.unwrap();
        assert_eq!(
            http_stream.body_writer.body_mode,
            BodyMode::ContentLength(3, 3)
        );
    }

    #[tokio::test]
    async fn close_delimited_response_explicitly_disables_keepalive() {
        init_log();
        // Defense-in-depth: if we read a close-delimited response body,
        // keepalive should be disabled
        let wire = b"GET / HTTP/1.1\r\n\r\n";
        let input_header = b"HTTP/1.1 200 OK\r\n\r\n";
        let input_body = b"abc";
        let input_close = b""; // simulating close
        let mock_io = Builder::new()
            .write(&wire[..])
            .read(&input_header[..])
            .read(&input_body[..])
            .read(&input_close[..])
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));

        // Write request first
        let new_request = RequestHeader::build("GET", b"/", None).unwrap();
        http_stream
            .write_request_header(Box::new(new_request))
            .await
            .unwrap();

        // Read response
        http_stream.read_response().await.unwrap();

        // Read the body (this will initialize the body reader)
        http_stream.read_body_ref().await.unwrap();

        // Body reader should be in UntilClose mode (close-delimited response)
        assert_eq!(
            http_stream.body_reader.body_state,
            ParseState::UntilClose(3)
        );

        let res2 = http_stream.read_body_ref().await.unwrap();
        assert!(res2.is_none()); // EOF

        // Body should now be Complete
        assert_eq!(http_stream.body_reader.body_state, ParseState::Complete(3));

        http_stream.respect_keepalive();
        assert!(!http_stream.will_keepalive());
    }
}

#[cfg(test)]
mod test_sync {
    use super::*;
    use log::error;

    #[test]
    fn test_request_to_wire() {
        let mut new_request = RequestHeader::build("GET", b"/", None).unwrap();
        new_request.insert_header("Foo", "Bar").unwrap();
        let wire = http_req_header_to_wire(&new_request).unwrap();
        let mut headers = [httparse::EMPTY_HEADER; 128];
        let mut req = httparse::Request::new(&mut headers);
        let result = req.parse(wire.as_ref());
        match result {
            Ok(_) => {}
            Err(e) => error!("{:?}", e),
        }
        assert!(result.unwrap().is_complete());
        // FIXME: the order is not guaranteed
        assert_eq!("/", req.path.unwrap());
        assert_eq!(b"Foo", headers[0].name.as_bytes());
        assert_eq!(b"Bar", headers[0].value);
    }
}


================================================
FILE: pingora-core/src/protocols/http/v1/common.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Common functions and constants

use http::{header, HeaderValue};
use log::warn;
use pingora_error::{Error, ErrorType::*, Result};
use pingora_http::{HMap, RequestHeader, ResponseHeader};
use std::str;
use std::time::Duration;

use super::body::BodyWriter;
use crate::utils::KVRef;

pub(super) const MAX_HEADERS: usize = 256;

pub(super) const INIT_HEADER_BUF_SIZE: usize = 4096;
pub(super) const MAX_HEADER_SIZE: usize = 1048575;

pub(crate) const BODY_BUF_LIMIT: usize = 1024 * 64;

pub const CRLF: &[u8; 2] = b"\r\n";
pub const HEADER_KV_DELIMITER: &[u8; 2] = b": ";

pub(super) enum HeaderParseState {
    Complete(usize),
    Partial,
    Invalid(httparse::Error),
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub(super) enum KeepaliveStatus {
    Timeout(Duration),
    Infinite,
    Off,
}

struct ConnectionValue {
    keep_alive: bool,
    upgrade: bool,
    close: bool,
}

impl ConnectionValue {
    fn new() -> Self {
        ConnectionValue {
            keep_alive: false,
            upgrade: false,
            close: false,
        }
    }

    fn close(mut self) -> Self {
        self.close = true;
        self
    }
    fn upgrade(mut self) -> Self {
        self.upgrade = true;
        self
    }
    fn keep_alive(mut self) -> Self {
        self.keep_alive = true;
        self
    }
}

fn parse_connection_header(value: &[u8]) -> ConnectionValue {
    // only parse keep-alive, close, and upgrade tokens
    // https://www.rfc-editor.org/rfc/rfc9110.html#section-7.6.1

    const KEEP_ALIVE: &str = "keep-alive";
    const CLOSE: &str = "close";
    const UPGRADE: &str = "upgrade";

    // fast path
    if value.eq_ignore_ascii_case(CLOSE.as_bytes()) {
        ConnectionValue::new().close()
    } else if value.eq_ignore_ascii_case(KEEP_ALIVE.as_bytes()) {
        ConnectionValue::new().keep_alive()
    } else if value.eq_ignore_ascii_case(UPGRADE.as_bytes()) {
        ConnectionValue::new().upgrade()
    } else {
        // slow path, parse the connection value
        let mut close = false;
        let mut upgrade = false;
        let value = str::from_utf8(value).unwrap_or("");
        for token in value
            .split(',')
            .map(|s| s.trim())
            .filter(|&x| !x.is_empty())
        {
            if token.eq_ignore_ascii_case(CLOSE) {
                close = true;
            } else if token.eq_ignore_ascii_case(UPGRADE) {
                upgrade = true;
            }
            if upgrade && close {
                return ConnectionValue::new().upgrade().close();
            }
        }
        if close {
            ConnectionValue::new().close()
        } else if upgrade {
            ConnectionValue::new().upgrade()
        } else {
            ConnectionValue::new()
        }
    }
}

pub(crate) fn init_body_writer_comm(body_writer: &mut BodyWriter, headers: &HMap) {
    if is_chunked_encoding_from_headers(headers) {
        // transfer-encoding takes priority over content-length
        body_writer.init_chunked();
    } else {
        let content_length = header_value_content_length(headers.get(http::header::CONTENT_LENGTH));
        match content_length {
            Some(length) => {
                body_writer.init_content_length(length);
            }
            None => {
                /* TODO: 1. connection: keepalive cannot be used,
                2. mark connection must be closed */
                body_writer.init_close_delimited();
            }
        }
    }
}

/// Find the last comma-separated token in a Transfer-Encoding header value.
/// Takes the literal last token after the last comma, even if empty.
#[inline]
fn find_last_te_token(bytes: &[u8]) -> &[u8] {
    let last_token = bytes
        .iter()
        .rposition(|&b| b == b',')
        .map(|pos| &bytes[pos + 1..])
        .unwrap_or(bytes);

    last_token.trim_ascii()
}

/// Check if chunked encoding is the final encoding across all transfer-encoding headers
pub(crate) fn is_chunked_encoding_from_headers(headers: &HMap) -> bool {
    // Get the last Transfer-Encoding header value
    let last_te = headers
        .get_all(http::header::TRANSFER_ENCODING)
        .into_iter()
        .next_back();

    let Some(last_header_value) = last_te else {
        return false;
    };

    let bytes = last_header_value.as_bytes();

    // Fast path: exact match for "chunked"
    if bytes.eq_ignore_ascii_case(b"chunked") {
        return true;
    }

    // Slow path: parse comma-separated values
    find_last_te_token(bytes).eq_ignore_ascii_case(b"chunked")
}

pub fn is_upgrade_req(req: &RequestHeader) -> bool {
    req.version == http::Version::HTTP_11 && req.headers.get(header::UPGRADE).is_some()
}

pub fn is_expect_continue_req(req: &RequestHeader) -> bool {
    req.version == http::Version::HTTP_11
        // https://www.rfc-editor.org/rfc/rfc9110#section-10.1.1
        && req.headers.get(header::EXPECT).is_some_and(|v| {
            v.as_bytes().eq_ignore_ascii_case(b"100-continue")
        })
}

// Unlike the upgrade check on request, this function doesn't check the Upgrade or Connection header
// because when seeing 101, we assume the server accepts to switch protocol.
// In reality it is not common that some servers don't send all the required headers to establish
// websocket connections.
pub fn is_upgrade_resp(header: &ResponseHeader) -> bool {
    header.status == 101 && header.version == http::Version::HTTP_11
}

#[inline]
pub fn header_value_content_length(
    header_value: Option<&http::header::HeaderValue>,
) -> Option<usize> {
    match header_value {
        Some(value) => buf_to_content_length(Some(value.as_bytes())).ok().flatten(),
        None => None,
    }
}

#[inline]
pub(super) fn buf_to_content_length(header_value: Option<&[u8]>) -> Result<Option<usize>> {
    match header_value {
        Some(buf) => {
            match str::from_utf8(buf) {
                // check valid string
                Ok(str_cl_value) => match str_cl_value.parse::<i64>() {
                    Ok(cl_length) => {
                        if cl_length >= 0 {
                            Ok(Some(cl_length as usize))
                        } else {
                            warn!("negative content-length header value {cl_length}");
                            Error::e_explain(
                                InvalidHTTPHeader,
                                format!("negative Content-Length header value: {cl_length}"),
                            )
                        }
                    }
                    Err(_) => {
                        warn!("invalid content-length header value {str_cl_value}");
                        Error::e_explain(
                            InvalidHTTPHeader,
                            format!("invalid Content-Length header value: {str_cl_value}"),
                        )
                    }
                },
                Err(_) => {
                    warn!("invalid content-length header encoding");
                    Error::e_explain(InvalidHTTPHeader, "invalid Content-Length header encoding")
                }
            }
        }
        None => Ok(None),
    }
}

#[inline]
pub(super) fn is_buf_keepalive(header_value: Option<&HeaderValue>) -> Option<bool> {
    header_value.and_then(|value| {
        let value = parse_connection_header(value.as_bytes());
        if value.keep_alive {
            Some(true)
        } else if value.close {
            Some(false)
        } else {
            None
        }
    })
}

#[inline]
pub(super) fn populate_headers(
    base: usize,
    header_ref: &mut Vec<KVRef>,
    headers: &[httparse::Header],
) -> usize {
    let mut used_header_index = 0;
    for header in headers.iter() {
        if !header.name.is_empty() {
            header_ref.push(KVRef::new(
                header.name.as_ptr() as usize - base,
                header.name.len(),
                header.value.as_ptr() as usize - base,
                header.value.len(),
            ));
            used_header_index += 1;
        }
    }
    used_header_index
}

// RFC 7230:
// If a message is received without Transfer-Encoding and with
// either multiple Content-Length header fields having differing
// field-values or a single Content-Length header field having an
// invalid value, then the message framing is invalid and the
// recipient MUST treat it as an unrecoverable error.
pub(super) fn check_dup_content_length(headers: &HMap) -> Result<()> {
    if headers.get(header::TRANSFER_ENCODING).is_some() {
        // If TE header, ignore CL
        return Ok(());
    }
    let mut cls = headers.get_all(header::CONTENT_LENGTH).into_iter();
    if cls.next().is_none() {
        // no CL header is fine.
        return Ok(());
    }
    if cls.next().is_some() {
        // duplicated CL is bad
        return crate::Error::e_explain(
            crate::ErrorType::InvalidHTTPHeader,
            "duplicated Content-Length header",
        );
    }
    Ok(())
}

#[cfg(test)]
mod test {
    use super::*;
    use http::{
        header::{CONTENT_LENGTH, TRANSFER_ENCODING},
        StatusCode, Version,
    };
    use rstest::rstest;

    #[test]
    fn test_check_dup_content_length() {
        let mut headers = HMap::new();

        assert!(check_dup_content_length(&headers).is_ok());

        headers.append(CONTENT_LENGTH, "1".try_into().unwrap());
        assert!(check_dup_content_length(&headers).is_ok());

        headers.append(CONTENT_LENGTH, "2".try_into().unwrap());
        assert!(check_dup_content_length(&headers).is_err());

        headers.append(TRANSFER_ENCODING, "chunkeds".try_into().unwrap());
        assert!(check_dup_content_length(&headers).is_ok());
    }

    #[test]
    fn test_is_upgrade_resp() {
        let mut response = ResponseHeader::build(StatusCode::SWITCHING_PROTOCOLS, None).unwrap();
        response.set_version(Version::HTTP_11);
        response.insert_header("Upgrade", "websocket").unwrap();
        response.insert_header("Connection", "upgrade").unwrap();
        assert!(is_upgrade_resp(&response));

        // wrong http version
        response.set_version(Version::HTTP_10);
        response.insert_header("Upgrade", "websocket").unwrap();
        response.insert_header("Connection", "upgrade").unwrap();
        assert!(!is_upgrade_resp(&response));

        // not 101
        response.set_status(StatusCode::OK).unwrap();
        response.set_version(Version::HTTP_11);
        assert!(!is_upgrade_resp(&response));
    }

    #[test]
    fn test_is_chunked_encoding_from_headers_empty() {
        let empty_headers = HMap::new();
        assert!(!is_chunked_encoding_from_headers(&empty_headers));
    }

    #[rstest]
    #[case::single_chunked("chunked", true)]
    #[case::comma_separated_final("identity, chunked", true)]
    #[case::whitespace_around("  chunked  ", true)]
    #[case::empty_elements_before(", , , chunked", true)]
    #[case::only_identity("identity", false)]
    #[case::trailing_comma("chunked, ", false)]
    #[case::multiple_trailing_commas("chunked, , ", false)]
    #[case::empty_value("", false)]
    #[case::whitespace_only("   ", false)]
    fn test_is_chunked_encoding_single_header(#[case] value: &str, #[case] expected: bool) {
        let mut headers = HMap::new();
        headers.insert(TRANSFER_ENCODING, value.try_into().unwrap());
        assert_eq!(is_chunked_encoding_from_headers(&headers), expected);
    }

    #[rstest]
    #[case::two_headers_chunked_last(&["identity", "chunked"], true)]
    #[case::three_headers_chunked_last(&["gzip", "identity", "chunked"], true)]
    #[case::last_has_comma_separated(&["gzip", "identity, chunked"], true)]
    #[case::whitespace_in_last(&["gzip", "  chunked  "], true)]
    #[case::two_headers_no_chunked(&["identity", "gzip"], false)]
    #[case::chunked_not_last(&["chunked", "identity"], false)]
    #[case::last_has_chunked_not_final(&["gzip", "chunked, identity"], false)]
    #[case::chunked_overridden(&["chunked", "identity, gzip"], false)]
    #[case::trailing_comma_in_last(&["gzip", "chunked, "], false)]
    fn test_is_chunked_encoding_multiple_headers(#[case] values: &[&str], #[case] expected: bool) {
        let mut headers = HMap::new();
        for value in values {
            headers.append(TRANSFER_ENCODING, (*value).try_into().unwrap());
        }
        assert_eq!(is_chunked_encoding_from_headers(&headers), expected);
    }
}


================================================
FILE: pingora-core/src/protocols/http/v1/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! HTTP/1.x implementation

pub(crate) mod body;
pub mod client;
pub mod common;
pub mod server;


================================================
FILE: pingora-core/src/protocols/http/v1/server.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! HTTP/1.x server session

use bstr::ByteSlice;
use bytes::Bytes;
use bytes::{BufMut, BytesMut};
use http::header::{CONTENT_LENGTH, TRANSFER_ENCODING};
use http::HeaderValue;
use http::{header, header::AsHeaderName, Method, Version};
use log::{debug, trace, warn};
use once_cell::sync::Lazy;
use percent_encoding::{percent_encode, AsciiSet, CONTROLS};
use pingora_error::{Error, ErrorType::*, OrErr, Result};
use pingora_http::{IntoCaseHeaderName, RequestHeader, ResponseHeader};
use pingora_timeout::timeout;
use regex::bytes::Regex;
use std::time::Duration;
use tokio::io::{AsyncReadExt, AsyncWriteExt};

use super::body::{BodyReader, BodyWriter};
use super::common::*;
use crate::protocols::http::{body_buffer::FixedBuffer, date, HttpTask};
use crate::protocols::{Digest, SocketAddr, Stream};
use crate::utils::{BufRef, KVRef};

/// The HTTP 1.x server session
pub struct HttpSession {
    underlying_stream: Stream,
    /// The buf that holds the raw request header + possibly a portion of request body
    /// Request body can appear here because they could arrive with the same read() that
    /// sends the request header.
    buf: Bytes,
    /// A slice reference to `buf` which points to the exact range of request header
    raw_header: Option<BufRef>,
    /// A slice reference to `buf` which points to the range of a portion of request body if any
    preread_body: Option<BufRef>,
    /// A state machine to track how to read the request body
    body_reader: BodyReader,
    /// A state machine to track how to write the response body
    body_writer: BodyWriter,
    /// An internal buffer to buf multiple body writes to reduce the underlying syscalls
    body_write_buf: BytesMut,
    /// Track how many application (not on the wire) body bytes already sent
    body_bytes_sent: usize,
    /// Track how many application (not on the wire) body bytes already read
    body_bytes_read: usize,
    /// Whether to update headers like connection, Date
    update_resp_headers: bool,
    /// timeouts:
    keepalive_timeout: KeepaliveStatus,
    read_timeout: Option<Duration>,
    write_timeout: Option<Duration>,
    /// How long to wait to make downstream session reusable, if body needs to be drained.
    total_drain_timeout: Option<Duration>,
    /// A copy of the response that is already written to the client
    response_written: Option<Box<ResponseHeader>>,
    /// The parsed request header
    request_header: Option<Box<RequestHeader>>,
    /// An internal buffer that holds a copy of the request body up to a certain size
    retry_buffer: Option<FixedBuffer>,
    /// Whether this session is an upgraded session. This flag is calculated when sending the
    /// response header to the client.
    upgraded: bool,
    /// Digest to track underlying connection metrics
    digest: Box<Digest>,
    /// Minimum send rate to the client
    min_send_rate: Option<usize>,
    /// When this is enabled informational response headers will not be proxied downstream
    ignore_info_resp: bool,
    /// Disable keepalive if response is sent before downstream body is finished
    close_on_response_before_downstream_finish: bool,

    /// Number of times the upstream connection associated with this session can be reused
    /// after this session ends
    keepalive_reuses_remaining: Option<u32>,
}

impl HttpSession {
    /// Create a new http server session from an established (TCP or TLS) [`Stream`].
    /// The created session needs to call [`Self::read_request()`] first before performing
    /// any other operations.
    pub fn new(underlying_stream: Stream) -> Self {
        // TODO: maybe we should put digest in the connection itself
        let digest = Box::new(Digest {
            ssl_digest: underlying_stream.get_ssl_digest(),
            timing_digest: underlying_stream.get_timing_digest(),
            proxy_digest: underlying_stream.get_proxy_digest(),
            socket_digest: underlying_stream.get_socket_digest(),
        });

        HttpSession {
            underlying_stream,
            buf: Bytes::new(), // zero size, with be replaced by parsed header later
            raw_header: None,
            preread_body: None,
            body_reader: BodyReader::new(false),
            body_writer: BodyWriter::new(),
            body_write_buf: BytesMut::new(),
            keepalive_timeout: KeepaliveStatus::Off,
            update_resp_headers: true,
            response_written: None,
            request_header: None,
            read_timeout: Some(Duration::from_secs(60)),
            write_timeout: None,
            total_drain_timeout: None,
            body_bytes_sent: 0,
            body_bytes_read: 0,
            retry_buffer: None,
            upgraded: false,
            digest,
            min_send_rate: None,
            ignore_info_resp: false,
            // default on to avoid rejecting requests after body as pipelined
            close_on_response_before_downstream_finish: true,
            keepalive_reuses_remaining: None,
        }
    }

    /// Read the request header. Return `Ok(Some(n))` where the read and parsing are successful.
    /// Return `Ok(None)` when the client closed the connection without sending any data, which
    /// is common on a reused connection.
    pub async fn read_request(&mut self) -> Result<Option<usize>> {
        const MAX_ERR_BUF_LEN: usize = 2048;

        self.buf.clear();
        let mut buf = BytesMut::with_capacity(INIT_HEADER_BUF_SIZE);
        let mut already_read: usize = 0;
        loop {
            if already_read > MAX_HEADER_SIZE {
                /* NOTE: this check only blocks second read. The first large read is allowed
                since the buf is already allocated. The goal is to avoid slowly bloating
                this buffer */
                return Error::e_explain(
                    InvalidHTTPHeader,
                    format!("Request header larger than {MAX_HEADER_SIZE}"),
                );
            }

            let read_result = {
                let read_event = self.underlying_stream.read_buf(&mut buf);
                match self.keepalive_timeout {
                    KeepaliveStatus::Timeout(d) => match timeout(d, read_event).await {
                        Ok(res) => res,
                        Err(e) => {
                            debug!("keepalive timeout {d:?} reached, {e}");
                            return Ok(None);
                        }
                    },
                    KeepaliveStatus::Infinite => {
                        // FIXME: this should only apply to reads between requests
                        read_event.await
                    }
                    KeepaliveStatus::Off => match self.read_timeout {
                        Some(t) => match timeout(t, read_event).await {
                            Ok(res) => res,
                            Err(e) => {
                                debug!("read timeout {t:?} reached, {e}");
                                return Error::e_explain(ReadTimedout, format!("timeout: {t:?}"));
                            }
                        },
                        None => read_event.await,
                    },
                }
            };
            let n = match read_result {
                Ok(n_read) => {
                    if n_read == 0 {
                        if already_read > 0 {
                            return Error::e_explain(
                                ConnectionClosed,
                                format!(
                                    "while reading request headers, bytes already read: {}",
                                    already_read
                                ),
                            );
                        } else {
                            /* common when client decides to close a keepalived session */
                            debug!("Client prematurely closed connection with 0 byte sent");
                            return Ok(None);
                        }
                    }
                    n_read
                }

                Err(e) => {
                    if already_read > 0 {
                        return Error::e_because(ReadError, "while reading request headers", e);
                    }
                    /* nothing harmful since we have not ready any thing yet */
                    return Ok(None);
                }
            };
            already_read += n;

            // Use loop as GOTO to retry escaped request buffer, not a real loop
            loop {
                let mut headers = [httparse::EMPTY_HEADER; MAX_HEADERS];
                let mut req = httparse::Request::new(&mut headers);
                let parsed = parse_req_buffer(&mut req, &buf);
                match parsed {
                    HeaderParseState::Complete(s) => {
                        self.raw_header = Some(BufRef(0, s));
                        self.preread_body = Some(BufRef(s, already_read));

                        // We have the header name and values we parsed to be just 0 copy Bytes
                        // referencing the original buf. That requires we convert the buf from
                        // BytesMut to Bytes. But `req` holds a reference to `buf`. So we use the
                        // `KVRef`s to record the offset of each piece of data, drop `req`, convert
                        // buf, the do the 0 copy update
                        let base = buf.as_ptr() as usize;
                        let mut header_refs = Vec::<KVRef>::with_capacity(req.headers.len());
                        // Note: req.headers has the correct number of headers
                        // while header_refs doesn't as it is still empty
                        let _num_headers = populate_headers(base, &mut header_refs, req.headers);

                        let mut request_header = Box::new(RequestHeader::build(
                            req.method.unwrap_or(""),
                            // we path httparse to allow unsafe bytes in the str
                            req.path.unwrap_or("").as_bytes(),
                            Some(req.headers.len()),
                        )?);

                        request_header.set_version(match req.version {
                            Some(1) => Version::HTTP_11,
                            Some(0) => Version::HTTP_10,
                            _ => Version::HTTP_09,
                        });

                        let buf = buf.freeze();

                        for header in header_refs {
                            let header_name = header.get_name_bytes(&buf);
                            let header_name = header_name.into_case_header_name();
                            let value_bytes = header.get_value_bytes(&buf);
                            // safe because this is from what we parsed
                            let header_value = unsafe {
                                http::HeaderValue::from_maybe_shared_unchecked(value_bytes)
                            };

                            request_header
                                .append_header(header_name, header_value)
                                .or_err(InvalidHTTPHeader, "while parsing request header")?;
                        }

                        let contains_transfer_encoding =
                            request_header.headers.contains_key(TRANSFER_ENCODING);
                        let contains_content_length =
                            request_header.headers.contains_key(CONTENT_LENGTH);

                        // Transfer encoding overrides content length, so when
                        // both are present, we can remove content length. This
                        // is per https://datatracker.ietf.org/doc/html/rfc9112#section-6.3
                        //
                        // RFC 9112 Section 6.1 (https://datatracker.ietf.org/doc/html/rfc9112#section-6.1-15)
                        // also requires us to disable keepalive when both headers are present.
                        let has_both_te_and_cl =
                            contains_content_length && contains_transfer_encoding;
                        if has_both_te_and_cl {
                            request_header.remove_header(&CONTENT_LENGTH);
                        }

                        self.buf = buf;
                        self.request_header = Some(request_header);

                        self.body_reader.reinit();
                        self.response_written = None;
                        self.respect_keepalive();

                        // Disable keepalive if both Transfer-Encoding and Content-Length were present
                        if has_both_te_and_cl {
                            self.set_keepalive(None);
                        }
                        self.validate_request()?;

                        return Ok(Some(s));
                    }
                    HeaderParseState::Partial => {
                        break; /* continue the read loop */
                    }
                    HeaderParseState::Invalid(e) => match e {
                        httparse::Error::Token | httparse::Error::Version => {
                            // try to escape URI
                            if let Some(new_buf) = escape_illegal_request_line(&buf) {
                                buf = new_buf;
                                already_read = buf.len();
                            } else {
                                debug!("Invalid request header from {:?}", self.underlying_stream);
                                buf.truncate(MAX_ERR_BUF_LEN);
                                return Error::e_because(
                                    InvalidHTTPHeader,
                                    format!("buf: {}", buf.escape_ascii()),
                                    e,
                                );
                            }
                        }
                        _ => {
                            debug!("Invalid request header from {:?}", self.underlying_stream);
                            buf.truncate(MAX_ERR_BUF_LEN);
                            return Error::e_because(
                                InvalidHTTPHeader,
                                format!("buf: {:?}", buf.as_bstr()),
                                e,
                            );
                        }
                    },
                }
            }
        }
    }

    /// Validate the request header read. This function must be called after the request header
    /// read.
    /// # Panics
    /// this function and most other functions will panic if called before [`Self::read_request()`]
    pub fn validate_request(&self) -> Result<()> {
        let req_header = self.req_header();

        // ad-hoc checks
        super::common::check_dup_content_length(&req_header.headers)?;

        if req_header.headers.contains_key(TRANSFER_ENCODING) {
            // Per [RFC 9112 Section 6.1-16](https://datatracker.ietf.org/doc/html/rfc9112#section-6.1-16),
            // HTTP/1.0 requests with Transfer-Encoding MUST be treated as having faulty framing.
            // We reject with 400 Bad Request and close the connection.
            if req_header.version == http::Version::HTTP_10 {
                return Error::e_explain(
                    InvalidHTTPHeader,
                    "HTTP/1.0 requests cannot include Transfer-Encoding header",
                );
            }
            // If chunked is not the final Transfer-Encoding, reject request
            // See https://datatracker.ietf.org/doc/html/rfc9112#section-6.3-2.4.3
            if !self.is_chunked_encoding() {
                return Error::e_explain(InvalidHTTPHeader, "non-chunked final Transfer-Encoding");
            }
        }
        // validate content-length value if present to avoid ambiguous framing
        self.get_content_length()?;

        Ok(())
    }

    /// Return a reference of the `RequestHeader` this session read
    /// # Panics
    /// this function and most other functions will panic if called before [`Self::read_request()`]
    pub fn req_header(&self) -> &RequestHeader {
        self.request_header
            .as_ref()
            .expect("Request header is not read yet")
    }

    /// Return a mutable reference of the `RequestHeader` this session read
    /// # Panics
    /// this function and most other functions will panic if called before [`Self::read_request()`]
    pub fn req_header_mut(&mut self) -> &mut RequestHeader {
        self.request_header
            .as_mut()
            .expect("Request header is not read yet")
    }

    /// Get the header value for the given header name
    /// If there are multiple headers under the same name, the first one will be returned
    /// Use `self.req_header().header.get_all(name)` to get all the headers under the same name
    pub fn get_header(&self, name: impl AsHeaderName) -> Option<&HeaderValue> {
        self.request_header
            .as_ref()
            .and_then(|h| h.headers.get(name))
    }

    /// Return the method of this request. None if the request is not read yet.
    pub(crate) fn get_method(&self) -> Option<&http::Method> {
        self.request_header.as_ref().map(|r| &r.method)
    }

    /// Return the path of the request (i.e., the `/hello?1` of `GET /hello?1 HTTP1.1`)
    /// An empty slice will be used if there is no path or the request is not read yet
    pub(crate) fn get_path(&self) -> &[u8] {
        self.request_header.as_ref().map_or(b"", |r| r.raw_path())
    }

    /// Return the host header of the request. An empty slice will be used if there is no host header
    pub(crate) fn get_host(&self) -> &[u8] {
        self.request_header
            .as_ref()
            .and_then(|h| h.headers.get(header::HOST))
            .map_or(b"", |h| h.as_bytes())
    }

    /// Return a string `$METHOD $PATH, Host: $HOST`. Mostly for logging and debug purpose
    pub fn request_summary(&self) -> String {
        format!(
            "{} {}, Host: {}",
            self.get_method().map_or("-", |r| r.as_str()),
            String::from_utf8_lossy(self.get_path()),
            String::from_utf8_lossy(self.get_host())
        )
    }

    /// Is the request a upgrade request
    pub fn is_upgrade_req(&self) -> bool {
        match self.request_header.as_deref() {
            Some(req) => is_upgrade_req(req),
            None => false,
        }
    }

    /// Get the request header as raw bytes, `b""` when the header doesn't exist
    pub fn get_header_bytes(&self, name: impl AsHeaderName) -> &[u8] {
        self.get_header(name).map_or(b"", |v| v.as_bytes())
    }

    /// Read the request body. `Ok(None)` when there is no (more) body to read.
    pub async fn read_body_bytes(&mut self) -> Result<Option<Bytes>> {
        let read = self.read_body().await?;
        Ok(read.map(|b| {
            let bytes = Bytes::copy_from_slice(self.get_body(&b));
            self.body_bytes_read += bytes.len();
            if let Some(buffer) = self.retry_buffer.as_mut() {
                buffer.write_to_buffer(&bytes);
            }
            bytes
        }))
    }

    async fn do_read_body(&mut self) -> Result<Option<BufRef>> {
        self.init_body_reader();
        self.body_reader
            .read_body(&mut self.underlying_stream)
            .await
    }

    /// Read the body into the internal buffer
    async fn read_body(&mut self) -> Result<Option<BufRef>> {
        match self.read_timeout {
            Some(t) => match timeout(t, self.do_read_body()).await {
                Ok(res) => res,
                Err(_) => Error::e_explain(ReadTimedout, format!("reading body, timeout: {t:?}")),
            },
            None => self.do_read_body().await,
        }
    }

    async fn do_drain_request_body(&mut self) -> Result<()> {
        loop {
            match self.read_body_bytes().await {
                Ok(Some(_)) => { /* continue to drain */ }
                Ok(None) => return Ok(()), // done
                Err(e) => return Err(e),
            }
        }
    }

    /// Drain the request body. `Ok(())` when there is no (more) body to read.
    pub async fn drain_request_body(&mut self) -> Result<()> {
        if self.is_body_done() {
            return Ok(());
        }
        match self.total_drain_timeout {
            Some(t) => match timeout(t, self.do_drain_request_body()).await {
                Ok(res) => res,
                Err(_) => Error::e_explain(ReadTimedout, format!("draining body, timeout: {t:?}")),
            },
            None => self.do_drain_request_body().await,
        }
    }

    /// Whether there is no (more) body to be read.
    pub fn is_body_done(&mut self) -> bool {
        self.init_body_reader();
        self.body_reader.body_done()
    }

    /// Whether the request has an empty body
    /// Because HTTP 1.1 clients have to send either `Content-Length` or `Transfer-Encoding` in order
    /// to signal the server that it will send the body, this function returns accurate results even
    /// only when the request header is just read.
    pub fn is_body_empty(&mut self) -> bool {
        self.init_body_reader();
        self.body_reader.body_empty()
    }

    /// Write the response header to the client.
    /// This function can be called more than once to send 1xx informational headers excluding 101.
    pub async fn write_response_header(&mut self, mut header: Box<ResponseHeader>) -> Result<()> {
        if header.status.is_informational() && self.ignore_info_resp(header.status.into()) {
            debug!("ignoring informational headers");
            return Ok(());
        }

        if let Some(resp) = self.response_written.as_ref() {
            if !resp.status.is_informational() || self.upgraded {
                warn!("Respond header is already sent, cannot send again");
                return Ok(());
            }
        }

        // if body unfinished, or request header was not finished reading
        if self.close_on_response_before_downstream_finish
            && (self.request_header.is_none() || !self.is_body_done())
        {
            debug!("set connection close before downstream finish");
            self.set_keepalive(None);
        }

        // no need to add these headers to 1xx responses
        if !header.status.is_informational() && self.update_resp_headers {
            /* update headers */
            header.insert_header(header::DATE, date::get_cached_date())?;

            // TODO: make these lazy static
            let connection_value = if self.will_keepalive() {
                "keep-alive"
            } else {
                "close"
            };
            header.insert_header(header::CONNECTION, connection_value)?;
        }

        if header.status == 101 {
            // make sure the connection is closed at the end when 101/upgrade is used
            self.set_keepalive(None);
        }

        // Allow informational header (excluding 101) to pass through without affecting the state
        // of the request
        if header.status == 101 || !header.status.is_informational() {
            // reset request body to done for incomplete upgrade handshakes
            if let Some(upgrade_ok) = self.is_upgrade(&header) {
                if upgrade_ok {
                    debug!("ok upgrade handshake");
                    // For ws we use HTTP1_0 do_read_body_until_closed
                    //
                    // On ws close the initiator sends a close frame and
                    // then waits for a response from the peer, once it receives
                    // a response it closes the conn. After receiving a
                    // control frame indicating the connection should be closed,
                    // a peer discards any further data received.
                    // https://www.rfc-editor.org/rfc/rfc6455#section-1.4
                    self.upgraded = true;
                    // Now that the upgrade was successful, we need to change
                    // how we interpret the rest of the body as pass-through.
                    if self.body_reader.need_init() {
                        self.init_body_reader();
                    } else {
                        // already initialized
                        // immediately start reading the rest of the body as upgraded
                        // (in practice most upgraded requests shouldn't have any body)
                        //
                        // TODO: https://datatracker.ietf.org/doc/html/rfc9110#name-upgrade
                        // the most spec-compliant behavior is to switch interpretation
                        // after sending the former body,
                        // we immediately switch interpretation to match nginx
                        self.body_reader.convert_to_close_delimited();
                    }
                } else {
                    // this was a request that requested Upgrade,
                    // but upstream did not comply
                    debug!("bad upgrade handshake!");
                    // continue to read body as-is, this is now just a regular request
                }
            }
            self.init_body_writer(&header);
        }

        // Defense-in-depth: if response body is close-delimited, mark session
        // as un-reusable
        if self.body_writer.is_close_delimited() {
            self.set_keepalive(None);
        }

        // Don't have to flush response with content length because it is less
        // likely to be real time communication. So do flush when
        // 1.1xx response: client needs to see it before the rest of response
        // 2.No content length: the response could be generated in real time
        let flush = header.status.is_informational()
            || header.headers.get(header::CONTENT_LENGTH).is_none();

        let mut write_buf = BytesMut::with_capacity(INIT_HEADER_BUF_SIZE);
        http_resp_header_to_buf(&header, &mut write_buf).unwrap();
        match self.underlying_stream.write_all(&write_buf).await {
            Ok(()) => {
                // flush the stream if 1xx header or there is no response body
                if flush || self.body_writer.finished() {
                    self.underlying_stream
                        .flush()
                        .await
                        .or_err(WriteError, "flushing response header")?;
                }
                self.response_written = Some(header);
                self.body_bytes_sent += write_buf.len();
                Ok(())
            }
            Err(e) => Error::e_because(WriteError, "writing response header", e),
        }
    }

    /// Return the response header if it is already sent.
    pub fn response_written(&self) -> Option<&ResponseHeader> {
        self.response_written.as_deref()
    }

    /// `Some(true)` if the this is a successful upgrade
    /// `Some(false)` if the request is an upgrade but the response refuses it
    /// `None` if the request is not an upgrade.
    pub fn is_upgrade(&self, header: &ResponseHeader) -> Option<bool> {
        if self.is_upgrade_req() {
            Some(is_upgrade_resp(header))
        } else {
            None
        }
    }

    /// Was this request successfully turned into an upgraded connection?
    ///
    /// Both the request had to have been an `Upgrade` request
    /// and the response had to have been a `101 Switching Protocols`.
    pub fn was_upgraded(&self) -> bool {
        self.upgraded
    }

    fn set_keepalive(&mut self, seconds: Option<u64>) {
        match seconds {
            Some(sec) => {
                if sec > 0 {
                    self.keepalive_timeout = KeepaliveStatus::Timeout(Duration::from_secs(sec));
                } else {
                    self.keepalive_timeout = KeepaliveStatus::Infinite;
                }
            }
            None => {
                self.keepalive_timeout = KeepaliveStatus::Off;
            }
        }
    }

    pub fn get_keepalive_timeout(&self) -> Option<u64> {
        match self.keepalive_timeout {
            KeepaliveStatus::Timeout(d) => Some(d.as_secs()),
            KeepaliveStatus::Infinite => Some(0),
            KeepaliveStatus::Off => None,
        }
    }

    pub fn set_keepalive_reuses_remaining(&mut self, remaining: Option<u32>) {
        self.keepalive_reuses_remaining = remaining;
    }

    pub fn get_keepalive_reuses_remaining(&self) -> Option<u32> {
        self.keepalive_reuses_remaining
    }

    /// Return whether the session will be keepalived for connection reuse.
    pub fn will_keepalive(&self) -> bool {
        !matches!(
            (&self.keepalive_timeout, self.keepalive_reuses_remaining),
            (KeepaliveStatus::Off, _) | (_, Some(0))
        )
    }

    // `Keep-Alive: timeout=5, max=1000` => 5, 1000
    fn get_keepalive_values(&self) -> (Option<u64>, Option<usize>) {
        // TODO: implement this parsing
        (None, None)
    }

    fn ignore_info_resp(&self, status: u16) -> bool {
        // ignore informational response if ignore flag is set and it's not an Upgrade and Expect: 100-continue isn't set
        self.ignore_info_resp && status != 101 && !(status == 100 && self.is_expect_continue_req())
    }

    fn is_expect_continue_req(&self) -> bool {
        match self.request_header.as_deref() {
            Some(req) => is_expect_continue_req(req),
            None => false,
        }
    }

    fn is_connection_keepalive(&self) -> Option<bool> {
        is_buf_keepalive(self.get_header(header::CONNECTION))
    }

    // calculate write timeout from min_send_rate if set, otherwise return write_timeout
    fn write_timeout(&self, buf_len: usize) -> Option<Duration> {
        let Some(min_send_rate) = self.min_send_rate.filter(|r| *r > 0) else {
            return self.write_timeout;
        };

        // min timeout is 1s
        let ms = (buf_len.max(min_send_rate) as f64 / min_send_rate as f64) * 1000.0;
        // truncates unrealistically large values (we'll be out of memory before this happens)
        Some(Duration::from_millis(ms as u64))
    }

    /// Apply keepalive settings according to the client
    /// For HTTP 1.1, assume keepalive as long as there is no `Connection: Close` request header.
    /// For HTTP 1.0, only keepalive if there is an explicit header `Connection: keep-alive`.
    pub fn respect_keepalive(&mut self) {
        if let Some(keepalive) = self.is_connection_keepalive() {
            if keepalive {
                let (timeout, _max_use) = self.get_keepalive_values();
                // TODO: respect max_use
                match timeout {
                    Some(d) => self.set_keepalive(Some(d)),
                    None => self.set_keepalive(Some(0)), // infinite
                }
            } else {
                self.set_keepalive(None);
            }
        } else if self.req_header().version == Version::HTTP_11 {
            self.set_keepalive(Some(0)); // on by default for http 1.1
        } else {
            self.set_keepalive(None); // off by default for http 1.0
        }
    }

    fn init_body_writer(&mut self, header: &ResponseHeader) {
        use http::StatusCode;
        /* the following responses don't have body 204, 304, and HEAD */
        if matches!(
            header.status,
            StatusCode::NO_CONTENT | StatusCode::NOT_MODIFIED
        ) || self.get_method() == Some(&Method::HEAD)
        {
            self.body_writer.init_content_length(0);
            return;
        }

        if header.status.is_informational() && header.status != StatusCode::SWITCHING_PROTOCOLS {
            // 1xx response, not enough to init body
            return;
        }

        if self.is_upgrade(header) == Some(true) {
            self.body_writer.init_close_delimited();
        } else {
            init_body_writer_comm(&mut self.body_writer, &header.headers);
        }
    }

    /// Same as [`Self::write_response_header()`] but takes a reference.
    pub async fn write_response_header_ref(&mut self, resp: &ResponseHeader) -> Result<()> {
        self.write_response_header(Box::new(resp.clone())).await
    }

    async fn do_write_body(&mut self, buf: &[u8]) -> Result<Option<usize>> {
        let written = self
            .body_writer
            .write_body(&mut self.underlying_stream, buf)
            .await;

        if let Ok(Some(num_bytes)) = written {
            self.body_bytes_sent += num_bytes;
        }

        written
    }

    /// Write response body to the client. Return `Ok(None)` when there shouldn't be more body
    /// to be written, e.g., writing more bytes than what the `Content-Length` header suggests
    pub async fn write_body(&mut self, buf: &[u8]) -> Result<Option<usize>> {
        // TODO: check if the response header is written
        match self.write_timeout(buf.len()) {
            Some(t) => match timeout(t, self.do_write_body(buf)).await {
                Ok(res) => res,
                Err(_) => Error::e_explain(WriteTimedout, format!("writing body, timeout: {t:?}")),
            },
            None => self.do_write_body(buf).await,
        }
    }

    async fn do_write_body_buf(&mut self) -> Result<Option<usize>> {
        // Don't flush empty chunks, they are considered end of body for chunks
        if self.body_write_buf.is_empty() {
            return Ok(None);
        }

        let written = self
            .body_writer
            .write_body(&mut self.underlying_stream, &self.body_write_buf)
            .await;

        if let Ok(Some(num_bytes)) = written {
            self.body_bytes_sent += num_bytes;
        }

        // make sure this buf is safe to reuse
        self.body_write_buf.clear();

        written
    }

    async fn write_body_buf(&mut self) -> Result<Option<usize>> {
        match self.write_timeout(self.body_write_buf.len()) {
            Some(t) => match timeout(t, self.do_write_body_buf()).await {
                Ok(res) => res,
                Err(_) => Error::e_explain(WriteTimedout, format!("writing body, timeout: {t:?}")),
            },
            None => self.do_write_body_buf().await,
        }
    }

    fn maybe_force_close_body_reader(&mut self) {
        if self.upgraded && !self.body_reader.body_done() {
            // response is done, reset the request body to close
            self.body_reader.init_content_length(0, b"");
        }
    }

    /// Signal that there is no more body to write.
    /// This call will try to flush the buffer if there is any un-flushed data.
    /// For chunked encoding response, this call will also send the last chunk.
    /// For upgraded sessions, this call will also close the reading of the client body.
    pub async fn finish_body(&mut self) -> Result<Option<usize>> {
        let res = self.body_writer.finish(&mut self.underlying_stream).await?;
        self.underlying_stream
            .flush()
            .await
            .or_err(WriteError, "flushing body")?;

        trace!(
            "finish body (response body writer), upgraded: {}",
            self.upgraded
        );
        self.maybe_force_close_body_reader();
        Ok(res)
    }

    /// Return how many response body bytes (application, not wire) already sent downstream
    pub fn body_bytes_sent(&self) -> usize {
        self.body_bytes_sent
    }

    /// Return how many request body bytes (application, not wire) already read from downstream
    pub fn body_bytes_read(&self) -> usize {
        self.body_bytes_read
    }

    fn is_chunked_encoding(&self) -> bool {
        is_chunked_encoding_from_headers(&self.req_header().headers)
    }

    fn get_content_length(&self) -> Result<Option<usize>> {
        buf_to_content_length(
            self.get_header(header::CONTENT_LENGTH)
                .map(|v| v.as_bytes()),
        )
    }

    fn init_body_reader(&mut self) {
        if self.body_reader.need_init() {
            // reset retry buffer
            if let Some(buffer) = self.retry_buffer.as_mut() {
                buffer.clear();
            }

            // follow https://datatracker.ietf.org/doc/html/rfc9112#section-6.3
            let preread_body = self.preread_body.as_ref().unwrap().get(&self.buf[..]);

            if self.was_upgraded() {
                // if upgraded _post_ 101 (and body was not init yet)
                // treat as upgraded body (pass through until closed)
                self.body_reader.init_close_delimited(preread_body);
            } else if self.is_chunked_encoding() {
                // if chunked encoding, content-length should be ignored
                self.body_reader.init_chunked(preread_body);
            } else {
                // At this point, validate_request() should have already been called,
                // so get_content_length() should not return an error for invalid values
                let cl = self.get_content_length().unwrap_or(None);
                match cl {
                    Some(i) => {
                        self.body_reader.init_content_length(i, preread_body);
                    }
                    None => {
                        // https://datatracker.ietf.org/doc/html/rfc9112#section-6.3
                        // "Request messages are never close-delimited because they are
                        // always explicitly framed by length or transfer coding, with the absence of
                        // both implying the request ends immediately after the header section."
                        self.body_reader.init_content_length(0, preread_body);
                    }
                }
            }
        }
    }

    pub fn retry_buffer_truncated(&self) -> bool {
        self.retry_buffer
            .as_ref()
            .map_or_else(|| false, |r| r.is_truncated())
    }

    pub fn enable_retry_buffering(&mut self) {
        if self.retry_buffer.is_none() {
            self.retry_buffer = Some(FixedBuffer::new(BODY_BUF_LIMIT))
        }
    }

    pub fn get_retry_buffer(&self) -> Option<Bytes> {
        self.retry_buffer.as_ref().and_then(|b| {
            if b.is_truncated() {
                None
            } else {
                b.get_buffer()
            }
        })
    }

    fn get_body(&self, buf_ref: &BufRef) -> &[u8] {
        // TODO: these get_*() could panic. handle them better
        self.body_reader.get_body(buf_ref)
    }

    /// This function will (async) block forever until the client closes the connection.
    pub async fn idle(&mut self) -> Result<usize> {
        // NOTE: this implementation breaks http pipelining, ideally we need poll_error
        // NOTE: buf cannot be empty, openssl-rs read() requires none empty buf.
        let mut buf: [u8; 1] = [0; 1];
        self.underlying_stream
            .read(&mut buf)
            .await
            .or_err(ReadError, "during HTTP idle state")
    }

    /// This function will return body bytes (same as [`Self::read_body_bytes()`]), but after
    /// the client body finishes (`Ok(None)` is returned), calling this function again will block
    /// forever, same as [`Self::idle()`].
    pub async fn read_body_or_idle(&mut self, no_body_expected: bool) -> Result<Option<Bytes>> {
        if no_body_expected || self.is_body_done() {
            // XXX: account for upgraded body reader change, if the read half split from the write half
            let read = self.idle().await?;
            if read == 0 {
                Error::e_explain(
                    ConnectionClosed,
                    if self.response_written.is_none() {
                        "Prematurely before response header is sent"
                    } else {
                        "Prematurely before response body is complete"
                    },
                )
            } else {
                Error::e_explain(ConnectError, "Sent data after end of body")
            }
        } else {
            self.read_body_bytes().await
        }
    }

    /// Return the raw bytes of the request header.
    pub fn get_headers_raw_bytes(&self) -> Bytes {
        self.raw_header.as_ref().unwrap().get_bytes(&self.buf)
    }

    /// Close the connection abruptly. This allows to signal the client that the connection is closed
    /// before dropping [`HttpSession`]
    pub async fn shutdown(&mut self) {
        let _ = self.underlying_stream.shutdown().await;
    }

    /// Set the server keepalive timeout.
    /// `None`: disable keepalive, this session cannot be reused.
    /// `Some(0)`: reusing this session is allowed and there is no timeout.
    /// `Some(>0)`: reusing this session is allowed within the given timeout in seconds.
    /// If the client disallows connection reuse, then `keepalive` will be ignored.
    pub fn set_server_keepalive(&mut self, keepalive: Option<u64>) {
        if let Some(false) = self.is_connection_keepalive() {
            // connection: close is set
            self.set_keepalive(None);
        } else {
            self.set_keepalive(keepalive);
        }
    }

    /// Sets the downstream read timeout. This will trigger if we're unable
    /// to read from the stream after `timeout`.
    pub fn set_read_timeout(&mut self, timeout: Option<Duration>) {
        self.read_timeout = timeout;
    }

    /// Gets the downstream read timeout.
    pub fn get_read_timeout(&self) -> Option<Duration> {
        self.read_timeout
    }

    /// Sets the downstream write timeout. This will trigger if we're unable
    /// to write to the stream after `timeout`. If a `min_send_rate` is
    /// configured then the `min_send_rate` calculated timeout has higher priority.
    pub fn set_write_timeout(&mut self, timeout: Option<Duration>) {
        self.write_timeout = timeout;
    }

    /// Gets the downstream write timeout.
    pub fn get_write_timeout(&self) -> Option<Duration> {
        self.write_timeout
    }

    /// Sets the total drain timeout. For HTTP/1.1, reusing a session requires
    /// ensuring that the request body is consumed. This `timeout` will be used
    /// to determine how long to wait for the entirety of the downstream request
    /// body to finish after the upstream response is completed to return the
    /// session to the reuse pool. If the timeout is exceeded, we will give up
    /// on trying to reuse the session.
    ///
    /// Note that the downstream read timeout still applies between body byte reads.
    pub fn set_total_drain_timeout(&mut self, timeout: Option<Duration>) {
        self.total_drain_timeout = timeout;
    }

    /// Get the total drain timeout.
    pub fn get_total_drain_timeout(&self) -> Option<Duration> {
        self.total_drain_timeout
    }

    /// Sets the minimum downstream send rate in bytes per second. This
    /// is used to calculate a write timeout in seconds based on the size
    /// of the buffer being written. If a `min_send_rate` is configured it
    /// has higher priority over a set `write_timeout`. The minimum send
    /// rate must be greater than zero.
    ///
    /// Calculated write timeout is guaranteed to be at least 1s if `min_send_rate`
    /// is greater than zero, a send rate of zero is equivalent to disabling.
    pub fn set_min_send_rate(&mut self, min_send_rate: Option<usize>) {
        if let Some(rate) = min_send_rate.filter(|r| *r > 0) {
            self.min_send_rate = Some(rate);
        } else {
            self.min_send_rate = None;
        }
    }

    /// Sets whether we ignore writing informational responses downstream.
    ///
    /// This is a noop if the response is Upgrade or Continue and
    /// Expect: 100-continue was set on the request.
    pub fn set_ignore_info_resp(&mut self, ignore: bool) {
        self.ignore_info_resp = ignore;
    }

    /// Sets whether keepalive should be disabled if response is written prior to
    /// downstream body finishing.
    ///
    /// This may be set to avoid draining downstream if the body is no longer necessary.
    pub fn set_close_on_response_before_downstream_finish(&mut self, close: bool) {
        self.close_on_response_before_downstream_finish = close;
    }

    /// Return the [Digest] of the connection.
    pub fn digest(&self) -> &Digest {
        &self.digest
    }

    /// Return a mutable [Digest] reference for the connection.
    pub fn digest_mut(&mut self) -> &mut Digest {
        &mut self.digest
    }

    /// Return the client (peer) address of the underlying connection.
    pub fn client_addr(&self) -> Option<&SocketAddr> {
        self.digest()
            .socket_digest
            .as_ref()
            .map(|d| d.peer_addr())?
    }

    /// Return the server (local) address of the underlying connection.
    pub fn server_addr(&self) -> Option<&SocketAddr> {
        self.digest()
            .socket_digest
            .as_ref()
            .map(|d| d.local_addr())?
    }

    /// Consume `self`, if the connection can be reused, the underlying stream will be returned
    /// to be fed to the next [`Self::new()`]. This drains any remaining request body if it hasn't
    /// yet been read and the stream is reusable.
    ///
    /// The next session can just call [`Self::read_request()`].
    ///
    /// If the connection cannot be reused, the underlying stream will be closed and `None` will be
    /// returned. If there was an error while draining any remaining request body that error will
    /// be returned.
    pub async fn reuse(mut self) -> Result<Option<Stream>> {
        if !self.will_keepalive() {
            debug!("HTTP shutdown connection");
            self.shutdown().await;
            Ok(None)
        } else {
            self.drain_request_body().await?;
            // XXX: currently pipelined requests are not properly read without
            // pipelining support, and pingora 400s if pipelined requests are sent
            // in the middle of another request.
            // We will mark the connection as un-reusable so it may be closed,
            // the pipelined request left unread, and the client can attempt to resend
            if self.body_reader.has_bytes_overread() {
                debug!("bytes overread on request, disallowing reuse");
                Ok(None)
            } else {
                Ok(Some(self.underlying_stream))
            }
        }
    }

    /// Write a `100 Continue` response to the client.
    pub async fn write_continue_response(&mut self) -> Result<()> {
        // only send if we haven't already
        if self.response_written.is_none() {
            // size hint Some(0) because default is 8
            return self
                .write_response_header(Box::new(ResponseHeader::build(100, Some(0)).unwrap()))
                .await;
        }
        Ok(())
    }

    async fn write_non_empty_body(&mut self, data: Option<Bytes>, upgraded: bool) -> Result<()> {
        // Both upstream and downstream should agree on upgrade status.
        // Upgrade can only occur if both downstream and upstream sessions are H1.1
        // and see a 101 response, which logically MUST have been received
        // prior to this task.
        if upgraded != self.upgraded {
            if upgraded {
                panic!("Unexpected UpgradedBody task received on un-upgraded downstream session");
            } else {
                panic!("Unexpected Body task received on upgraded downstream session");
            }
        }
        let Some(d) = data else {
            return Ok(());
        };
        if d.is_empty() {
            return Ok(());
        }
        self.write_body(&d).await.map_err(|e| e.into_down())?;
        Ok(())
    }

    async fn response_duplex(&mut self, task: HttpTask) -> Result<bool> {
        let end_stream = match task {
            HttpTask::Header(header, end_stream) => {
                self.write_response_header(header)
                    .await
                    .map_err(|e| e.into_down())?;
                end_stream
            }
            HttpTask::Body(data, end_stream) => {
                self.write_non_empty_body(data, false).await?;
                end_stream
            }
            HttpTask::UpgradedBody(data, end_stream) => {
                self.write_non_empty_body(data, true).await?;
                end_stream
            }
            HttpTask::Trailer(_) => true, // h1 trailer is not supported yet
            HttpTask::Done => true,
            HttpTask::Failed(e) => return Err(e),
        };
        if end_stream {
            // no-op if body wasn't initialized or is finished already
            self.finish_body().await.map_err(|e| e.into_down())?;
        }
        Ok(end_stream || self.body_writer.finished())
    }

    fn buffer_body_data(&mut self, data: Option<Bytes>, upgraded: bool) {
        if upgraded != self.upgraded {
            if upgraded {
                panic!("Unexpected Body task received on upgraded downstream session");
            } else {
                panic!("Unexpected UpgradedBody task received on un-upgraded downstream session");
            }
        }

        let Some(d) = data else {
            return;
        };
        if !d.is_empty() && !self.body_writer.finished() {
            self.body_write_buf.put_slice(&d);
        }
    }

    // TODO: use vectored write to avoid copying
    pub async fn response_duplex_vec(&mut self, mut tasks: Vec<HttpTask>) -> Result<bool> {
        let n_tasks = tasks.len();
        if n_tasks == 1 {
            // fallback to single operation to avoid copy
            return self.response_duplex(tasks.pop().unwrap()).await;
        }

        let mut end_stream = false;
        for task in tasks.into_iter() {
            end_stream = match task {
                HttpTask::Header(header, end_stream) => {
                    self.write_response_header(header)
                        .await
                        .map_err(|e| e.into_down())?;
                    end_stream
                }
                HttpTask::Body(data, end_stream) => {
                    self.buffer_body_data(data, false);
                    end_stream
                }
                HttpTask::UpgradedBody(data, end_stream) => {
                    self.buffer_body_data(data, true);
                    end_stream
                }
                HttpTask::Trailer(_) => true, // h1 trailer is not supported yet
                HttpTask::Done => true,
                HttpTask::Failed(e) => {
                    // flush the data we have and quit
                    self.write_body_buf().await.map_err(|e| e.into_down())?;
                    self.underlying_stream
                        .flush()
                        .await
                        .or_err(WriteError, "flushing response")?;
                    return Err(e);
                }
            }
        }
        self.write_body_buf().await.map_err(|e| e.into_down())?;
        if end_stream {
            // no-op if body wasn't initialized or is finished already
            self.finish_body().await.map_err(|e| e.into_down())?;
        }
        Ok(end_stream || self.body_writer.finished())
    }

    /// Get the reference of the [Stream] that this HTTP session is operating upon.
    pub fn stream(&self) -> &Stream {
        &self.underlying_stream
    }

    /// Consume `self`, the underlying stream will be returned and can be used
    /// directly, for example, in the case of HTTP upgrade. The stream is not
    /// flushed prior to being returned.
    pub fn into_inner(self) -> Stream {
        self.underlying_stream
    }
}

// Regex to parse request line that has illegal chars in it
static REQUEST_LINE_REGEX: Lazy<Regex> =
    Lazy::new(|| Regex::new(r"^\w+ (?P<uri>.+) HTTP/\d(?:\.\d)?").unwrap());

// the chars httparse considers illegal in URL
// Almost https://url.spec.whatwg.org/#query-percent-encode-set + {}
const URI_ESC_CHARSET: &AsciiSet = &CONTROLS.add(b' ').add(b'<').add(b'>').add(b'"');

fn escape_illegal_request_line(buf: &BytesMut) -> Option<BytesMut> {
    if let Some(captures) = REQUEST_LINE_REGEX.captures(buf) {
        // return if nothing matches: not a request line at all
        let uri = captures.name("uri")?;

        let escaped_uri = percent_encode(uri.as_bytes(), URI_ESC_CHARSET);

        // rebuild the entire request buf in a new buffer
        // TODO: this might be able to be done in place

        // need to be slightly bigger than the current buf;
        let mut new_buf = BytesMut::with_capacity(buf.len() + 32);
        new_buf.extend_from_slice(&buf[..uri.start()]);

        for s in escaped_uri {
            new_buf.extend_from_slice(s.as_bytes());
        }

        if new_buf.len() == uri.end() {
            // buf unchanged, nothing is escaped, return None to avoid loop
            return None;
        }

        new_buf.extend_from_slice(&buf[uri.end()..]);

        Some(new_buf)
    } else {
        None
    }
}

#[inline]
fn parse_req_buffer<'buf>(
    req: &mut httparse::Request<'_, 'buf>,
    buf: &'buf [u8],
) -> HeaderParseState {
    use httparse::Result;

    #[cfg(feature = "patched_http1")]
    fn parse<'buf>(req: &mut httparse::Request<'_, 'buf>, buf: &'buf [u8]) -> Result<usize> {
        req.parse_unchecked(buf)
    }

    #[cfg(not(feature = "patched_http1"))]
    fn parse<'buf>(req: &mut httparse::Request<'_, 'buf>, buf: &'buf [u8]) -> Result<usize> {
        req.parse(buf)
    }

    let res = match parse(req, buf) {
        Ok(s) => s,
        Err(e) => {
            return HeaderParseState::Invalid(e);
        }
    };
    match res {
        httparse::Status::Complete(s) => HeaderParseState::Complete(s),
        _ => HeaderParseState::Partial,
    }
}

#[inline]
fn http_resp_header_to_buf(
    resp: &ResponseHeader,
    buf: &mut BytesMut,
) -> std::result::Result<(), ()> {
    // Status-Line
    let version = match resp.version {
        Version::HTTP_09 => "HTTP/0.9 ",
        Version::HTTP_10 => "HTTP/1.0 ",
        Version::HTTP_11 => "HTTP/1.1 ",
        _ => {
            return Err(()); /*TODO: unsupported version */
        }
    };
    buf.put_slice(version.as_bytes());
    let status = resp.status;
    buf.put_slice(status.as_str().as_bytes());
    buf.put_u8(b' ');
    let reason = resp.get_reason_phrase();
    if let Some(reason_buf) = reason {
        buf.put_slice(reason_buf.as_bytes());
    }
    buf.put_slice(CRLF);

    // headers
    // TODO: style: make sure Server and Date headers are the first two
    resp.header_to_h1_wire(buf);

    buf.put_slice(CRLF);
    Ok(())
}

#[cfg(test)]
mod tests_stream {
    use super::*;
    use crate::protocols::http::v1::body::{BodyMode, ParseState};
    use http::StatusCode;
    use pingora_error::ErrorType;
    use rstest::rstest;
    use std::str;
    use tokio_test::io::Builder;

    fn init_log() {
        let _ = env_logger::builder().is_test(true).try_init();
    }

    #[tokio::test]
    async fn read_basic() {
        init_log();
        let input = b"GET / HTTP/1.1\r\n\r\n";
        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let res = http_stream.read_request().await;
        assert_eq!(input.len(), res.unwrap().unwrap());
        assert_eq!(0, http_stream.req_header().headers.len());
    }

    #[cfg(feature = "patched_http1")]
    #[tokio::test]
    async fn read_invalid_path() {
        init_log();
        let input = b"GET /\x01\xF0\x90\x80 HTTP/1.1\r\n\r\n";
        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let res = http_stream.read_request().await;
        assert_eq!(input.len(), res.unwrap().unwrap());
        assert_eq!(0, http_stream.req_header().headers.len());
        assert_eq!(b"/\x01\xF0\x90\x80", http_stream.get_path());
    }

    #[tokio::test]
    async fn read_2_buf() {
        init_log();
        let input1 = b"GET / HTTP/1.1\r\n";
        let input2 = b"Host: pingora.org\r\n\r\n";
        let mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let res = http_stream.read_request().await;
        assert_eq!(input1.len() + input2.len(), res.unwrap().unwrap());
        assert_eq!(
            input1.len() + input2.len(),
            http_stream.raw_header.as_ref().unwrap().len()
        );
        assert_eq!(1, http_stream.req_header().headers.len());
        assert_eq!(Some(&Method::GET), http_stream.get_method());
        assert_eq!(b"/", http_stream.get_path());
        assert_eq!(Version::HTTP_11, http_stream.req_header().version);

        assert_eq!(b"pingora.org", http_stream.get_header_bytes("Host"));
    }

    #[tokio::test]
    async fn read_with_body_content_length() {
        init_log();
        let input1 = b"GET / HTTP/1.1\r\n";
        let input2 = b"Host: pingora.org\r\nContent-Length: 3\r\n\r\n";
        let input3 = b"abc";
        let mock_io = Builder::new()
            .read(&input1[..])
            .read(&input2[..])
            .read(&input3[..])
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        let res = http_stream.read_body_bytes().await.unwrap().unwrap();
        assert_eq!(res, input3.as_slice());
        assert_eq!(http_stream.body_reader.body_state, ParseState::Complete(3));
        assert_eq!(http_stream.body_bytes_read(), 3);
    }

    #[tokio::test]
    #[should_panic(expected = "There is still data left to read.")]
    async fn read_with_body_timeout() {
        init_log();
        let input1 = b"GET / HTTP/1.1\r\n";
        let input2 = b"Host: pingora.org\r\nContent-Length: 3\r\n\r\n";
        let input3 = b"abc";
        let mock_io = Builder::new()
            .read(&input1[..])
            .read(&input2[..])
            .wait(Duration::from_secs(2))
            .read(&input3[..])
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_timeout = Some(Duration::from_secs(1));
        http_stream.read_request().await.unwrap();
        let res = http_stream.read_body_bytes().await;
        assert_eq!(http_stream.body_bytes_read(), 0);
        assert_eq!(res.unwrap_err().etype(), &ReadTimedout);
    }

    #[tokio::test]
    async fn read_with_body_content_length_single_read() {
        init_log();
        let input1 = b"GET / HTTP/1.1\r\n";
        let input2 = b"Host: pingora.org\r\nContent-Length: 3\r\n\r\nabc";
        let mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        let res = http_stream.read_body_bytes().await.unwrap().unwrap();
        assert_eq!(res, b"abc".as_slice());
        assert_eq!(http_stream.body_reader.body_state, ParseState::Complete(3));
        assert_eq!(http_stream.body_bytes_read(), 3);
    }

    #[tokio::test]
    #[should_panic(expected = "There is still data left to read.")]
    async fn read_with_body_http10() {
        init_log();
        let input1 = b"GET / HTTP/1.0\r\n";
        let input2 = b"Host: pingora.org\r\n\r\n";
        let input3 = b"a"; // This should NOT be read as body
        let input4 = b""; // simulating close - should also NOT be reached
        let mock_io = Builder::new()
            .read(&input1[..])
            .read(&input2[..])
            .read(&input3[..])
            .read(&input4[..])
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        let res = http_stream.read_body_bytes().await.unwrap();
        assert!(res.is_none());
        assert_eq!(http_stream.body_bytes_read(), 0);
        assert_eq!(http_stream.body_reader.body_state, ParseState::Complete(0));
    }

    #[tokio::test]
    async fn read_with_body_http10_single_read() {
        init_log();
        // should have 0 body, even when data follows the headers
        let input1 = b"GET / HTTP/1.0\r\n";
        let input2 = b"Host: pingora.org\r\n\r\na";
        let mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        let res = http_stream.read_body_bytes().await.unwrap();
        assert!(res.is_none());
        assert_eq!(http_stream.body_bytes_read(), 0);
        assert_eq!(http_stream.body_reader.body_state, ParseState::Complete(0));
        assert_eq!(http_stream.body_reader.get_body_overread().unwrap(), b"a");
    }

    #[tokio::test]
    async fn read_http11_default_no_body() {
        init_log();
        let input1 = b"GET / HTTP/1.1\r\n";
        let input2 = b"Host: pingora.org\r\n\r\n";
        let mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        let res = http_stream.read_body_bytes().await.unwrap();
        assert!(res.is_none());
        assert_eq!(http_stream.body_bytes_read(), 0);
        assert_eq!(http_stream.body_reader.body_state, ParseState::Complete(0));
    }

    #[tokio::test]
    async fn read_http10_with_content_length() {
        init_log();
        let input1 = b"POST / HTTP/1.0\r\n";
        let input2 = b"Host: pingora.org\r\nContent-Length: 3\r\n\r\n";
        let input3 = b"abc";
        let mock_io = Builder::new()
            .read(&input1[..])
            .read(&input2[..])
            .read(&input3[..])
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        let res = http_stream.read_body_bytes().await.unwrap().unwrap();
        assert_eq!(res, input3.as_slice());
        assert_eq!(http_stream.body_reader.body_state, ParseState::Complete(3));
        assert_eq!(http_stream.body_bytes_read(), 3);
    }

    #[tokio::test]
    async fn read_with_body_chunked_0_incomplete() {
        init_log();
        let input1 = b"GET / HTTP/1.1\r\n";
        let input2 = b"Host: pingora.org\r\nTransfer-Encoding: chunked\r\n\r\n";
        let input3 = b"0\r\n";
        let mock_io = Builder::new()
            .read(&input1[..])
            .read(&input2[..])
            .read(&input3[..])
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        assert!(http_stream.is_chunked_encoding());
        let res = http_stream.read_body_bytes().await.unwrap().unwrap();
        assert_eq!(res, b"".as_slice());
        let e = http_stream.read_body_bytes().await.unwrap_err();
        assert_eq!(*e.etype(), ErrorType::ConnectionClosed);
        assert_eq!(http_stream.body_reader.body_state, ParseState::Done(0));
    }

    #[tokio::test]
    async fn read_with_body_chunked_0_extra() {
        init_log();
        let input1 = b"GET / HTTP/1.1\r\n";
        let input2 = b"Host: pingora.org\r\nTransfer-Encoding: chunked\r\n\r\n";
        let input3 = b"0\r\n";
        let input4 = b"abc";
        let mock_io = Builder::new()
            .read(&input1[..])
            .read(&input2[..])
            .read(&input3[..])
            .read(&input4[..])
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        assert!(http_stream.is_chunked_encoding());
        let res = http_stream.read_body_bytes().await.unwrap().unwrap();
        assert_eq!(res, b"".as_slice());
        let res = http_stream.read_body_bytes().await.unwrap().unwrap();
        assert_eq!(res, b"".as_slice());
        let e = http_stream.read_body_bytes().await.unwrap_err();
        assert_eq!(*e.etype(), ErrorType::ConnectionClosed);
        assert_eq!(http_stream.body_reader.body_state, ParseState::Done(0));
    }

    #[tokio::test]
    async fn read_with_body_chunked_single_read() {
        init_log();
        let input1 = b"GET / HTTP/1.1\r\n";
        let input2 = b"Host: pingora.org\r\nTransfer-Encoding: chunked\r\n\r\n1\r\na\r\n";
        let input3 = b"0\r\n\r\n";
        let mock_io = Builder::new()
            .read(&input1[..])
            .read(&input2[..])
            .read(&input3[..])
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        assert!(http_stream.is_chunked_encoding());
        let res = http_stream.read_body_bytes().await.unwrap().unwrap();
        assert_eq!(res, b"a".as_slice());
        assert_eq!(
            http_stream.body_reader.body_state,
            ParseState::Chunked(1, 0, 0, 0)
        );
        let res = http_stream.read_body_bytes().await.unwrap();
        assert!(res.is_none());
        assert_eq!(http_stream.body_bytes_read(), 1);
        assert_eq!(http_stream.body_reader.body_state, ParseState::Complete(1));
    }

    #[tokio::test]
    async fn read_with_body_chunked_single_read_extra() {
        init_log();
        let input1 = b"GET / HTTP/1.1\r\n";
        let input2 = b"Host: pingora.org\r\nTransfer-Encoding: chunked\r\n\r\n1\r\na\r\n";
        let input3 = b"0\r\n\r\nabc";
        let mock_io = Builder::new()
            .read(&input1[..])
            .read(&input2[..])
            .read(&input3[..])
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        assert!(http_stream.is_chunked_encoding());
        let res = http_stream.read_body_bytes().await.unwrap().unwrap();
        assert_eq!(res, b"a".as_slice());
        assert_eq!(
            http_stream.body_reader.body_state,
            ParseState::Chunked(1, 0, 0, 0)
        );
        let res = http_stream.read_body_bytes().await.unwrap();
        assert!(res.is_none());
        assert_eq!(http_stream.body_bytes_read(), 1);
        assert_eq!(http_stream.body_reader.body_state, ParseState::Complete(1));
        assert_eq!(http_stream.body_reader.get_body_overread().unwrap(), b"abc");
    }

    #[rstest]
    #[case(None, None)]
    #[case(Some("transfer-encoding"), None)]
    #[case(Some("transfer-encoding"), Some("CONTENT-LENGTH"))]
    #[case(Some("TRANSFER-ENCODING"), Some("CONTENT-LENGTH"))]
    #[case(Some("TRANSFER-ENCODING"), None)]
    #[case(None, Some("CONTENT-LENGTH"))]
    #[case(Some("TRANSFER-ENCODING"), Some("content-length"))]
    #[case(None, Some("content-length"))]
    #[tokio::test]
    async fn transfer_encoding_and_content_length_disallowed(
        #[case] transfer_encoding_header: Option<&str>,
        #[case] content_length_header: Option<&str>,
    ) {
        init_log();
        let input1 = b"GET / HTTP/1.1\r\n";
        let mut input2 = "Host: pingora.org\r\n".to_owned();

        if let Some(transfer_encoding) = transfer_encoding_header {
            input2 += &format!("{transfer_encoding}: chunked\r\n");
        }
        if let Some(content_length) = content_length_header {
            input2 += &format!("{content_length}: 4\r\n")
        }

        input2 += "\r\n3e\r\na\r\n";
        let mock_io = Builder::new()
            .read(&input1[..])
            .read(input2.as_bytes())
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let _ = http_stream.read_request().await.unwrap();

        match (content_length_header, transfer_encoding_header) {
            (Some(_) | None, Some(_)) => {
                assert!(http_stream.get_header(TRANSFER_ENCODING).is_some());
                assert!(http_stream.get_header(CONTENT_LENGTH).is_none());
            }
            (Some(_), None) => {
                assert!(http_stream.get_header(TRANSFER_ENCODING).is_none());
                assert!(http_stream.get_header(CONTENT_LENGTH).is_some());
            }
            _ => {
                assert!(http_stream.get_header(CONTENT_LENGTH).is_none());
                assert!(http_stream.get_header(TRANSFER_ENCODING).is_none());
            }
        }
    }

    #[rstest]
    #[case::negative("-1")]
    #[case::not_a_number("abc")]
    #[case::float("1.5")]
    #[case::empty("")]
    #[case::spaces("  ")]
    #[case::mixed("123abc")]
    #[tokio::test]
    async fn validate_request_rejects_invalid_content_length(#[case] invalid_value: &str) {
        init_log();
        let input = format!(
            "POST / HTTP/1.1\r\nHost: pingora.org\r\nContent-Length: {}\r\n\r\n",
            invalid_value
        );
        let mock_io = Builder::new().read(input.as_bytes()).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        // read_request calls validate_request internally, so it should fail here
        let res = http_stream.read_request().await;
        assert!(res.is_err());
        assert_eq!(res.unwrap_err().etype(), &InvalidHTTPHeader);
    }

    #[rstest]
    #[case::valid_zero("0")]
    #[case::valid_small("123")]
    #[case::valid_large("999999")]
    #[tokio::test]
    async fn validate_request_accepts_valid_content_length(#[case] valid_value: &str) {
        init_log();
        let input = format!(
            "POST / HTTP/1.1\r\nHost: pingora.org\r\nContent-Length: {}\r\n\r\n",
            valid_value
        );
        let mock_io = Builder::new().read(input.as_bytes()).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let res = http_stream.read_request().await;
        assert!(res.is_ok());
    }

    #[tokio::test]
    async fn validate_request_accepts_no_content_length() {
        init_log();
        let input = b"GET / HTTP/1.1\r\nHost: pingora.org\r\n\r\n";
        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let res = http_stream.read_request().await;
        assert!(res.is_ok());
    }

    #[tokio::test]
    #[should_panic(expected = "There is still data left to read.")]
    async fn read_invalid() {
        let input1 = b"GET / HTP/1.1\r\n";
        let input2 = b"Host: pingora.org\r\n\r\n";
        let mock_io = Builder::new().read(&input1[..]).read(&input2[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let res = http_stream.read_request().await;
        assert_eq!(&InvalidHTTPHeader, res.unwrap_err().etype());
    }

    #[tokio::test]
    async fn read_invalid_header_end() {
        let input = b"POST / HTTP/1.1\r\nHost: pingora.org\r\nContent-Length: 3\r\r\nConnection: keep-alive\r\n\r\nabc";
        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let res = http_stream.read_request().await;
        assert_eq!(&InvalidHTTPHeader, res.unwrap_err().etype());
    }

    async fn build_upgrade_req(upgrade: &str, conn: &str) -> HttpSession {
        let input = format!("GET / HTTP/1.1\r\nHost: pingora.org\r\nUpgrade: {upgrade}\r\nConnection: {conn}\r\n\r\n");
        let mock_io = Builder::new().read(input.as_bytes()).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        http_stream
    }

    #[tokio::test]
    async fn read_upgrade_req() {
        // http 1.0
        let input = b"GET / HTTP/1.0\r\nHost: pingora.org\r\nUpgrade: websocket\r\nConnection: upgrade\r\n\r\n";
        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        assert!(!http_stream.is_upgrade_req());

        // different method
        let input = b"POST / HTTP/1.1\r\nHost: pingora.org\r\nUpgrade: websocket\r\nConnection: upgrade\r\n\r\n";
        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        assert!(http_stream.is_upgrade_req());

        // missing upgrade header
        let input = b"GET / HTTP/1.1\r\nHost: pingora.org\r\nConnection: upgrade\r\n\r\n";
        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        assert!(!http_stream.is_upgrade_req());

        // no connection header
        let input = b"GET / HTTP/1.1\r\nHost: pingora.org\r\nUpgrade: WebSocket\r\n\r\n";
        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        assert!(http_stream.is_upgrade_req());

        assert!(build_upgrade_req("websocket", "Upgrade")
            .await
            .is_upgrade_req());

        // mixed case
        assert!(build_upgrade_req("WebSocket", "Upgrade")
            .await
            .is_upgrade_req());
    }

    const POST_CL_UPGRADE_REQ: &[u8] = b"POST / HTTP/1.1\r\nHost: pingora.org\r\nUpgrade: websocket\r\nConnection: upgrade\r\nContent-Length: 10\r\n\r\n";
    const POST_BODY_DATA: &[u8] = b"abcdefghij";
    const POST_CHUNKED_UPGRADE_REQ: &[u8] = b"POST / HTTP/1.1\r\nHost: pingora.org\r\nUpgrade: websocket\r\nConnection: upgrade\r\nTransfer-Encoding: chunked\r\n\r\n";
    const POST_BODY_DATA_CHUNKED: &[u8] = b"3\r\nabc\r\n7\r\ndefghij\r\n0\r\n\r\n";

    #[rstest]
    #[case::content_length(POST_CL_UPGRADE_REQ, POST_BODY_DATA, POST_BODY_DATA)]
    #[case::chunked(POST_CHUNKED_UPGRADE_REQ, POST_BODY_DATA, POST_BODY_DATA_CHUNKED)]
    #[tokio::test]
    async fn read_upgrade_req_with_body(
        #[case] header: &[u8],
        #[case] body: &[u8],
        #[case] body_wire: &[u8],
    ) {
        let ws_data = b"data";
        let mock_io = Builder::new()
            .read(header)
            .read(body_wire)
            .write(b"HTTP/1.1 101 Switching Protocols\r\n\r\n")
            .read(&ws_data[..])
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        assert!(http_stream.is_upgrade_req());
        // request has body
        assert!(!http_stream.is_body_done());

        let mut buf = vec![];
        while let Some(b) = http_stream.read_body_bytes().await.unwrap() {
            buf.put_slice(&b);
        }
        assert_eq!(buf, body);
        assert_eq!(http_stream.body_reader.body_state, ParseState::Complete(10));
        assert_eq!(http_stream.body_bytes_read(), 10);

        assert!(http_stream.is_body_done());

        let mut response = ResponseHeader::build(StatusCode::SWITCHING_PROTOCOLS, None).unwrap();
        response.set_version(http::Version::HTTP_11);
        http_stream
            .write_response_header(Box::new(response))
            .await
            .unwrap();
        // body reader type switches
        assert!(!http_stream.is_body_done());

        // now the ws data
        let buf = http_stream.read_body_bytes().await.unwrap().unwrap();
        assert_eq!(buf, ws_data.as_slice());
        assert!(!http_stream.is_body_done());

        // EOF ends body
        assert!(http_stream.read_body_bytes().await.unwrap().is_none());
        assert!(http_stream.is_body_done());
    }

    #[rstest]
    #[case::content_length(POST_CL_UPGRADE_REQ, POST_BODY_DATA, POST_BODY_DATA)]
    #[case::chunked(POST_CHUNKED_UPGRADE_REQ, POST_BODY_DATA, POST_BODY_DATA_CHUNKED)]
    #[tokio::test]
    async fn read_upgrade_req_with_body_extra(
        #[case] header: &[u8],
        #[case] body: &[u8],
        #[case] body_wire: &[u8],
    ) {
        let ws_data = b"data";
        let data_wire = [body_wire, ws_data.as_slice()].concat();
        let mock_io = Builder::new()
            .read(header)
            .read(&data_wire[..])
            .write(b"HTTP/1.1 101 Switching Protocols\r\n\r\n")
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        assert!(http_stream.is_upgrade_req());
        // request has body
        assert!(!http_stream.is_body_done());

        let mut buf = vec![];
        while let Some(b) = http_stream.read_body_bytes().await.unwrap() {
            buf.put_slice(&b);
        }
        assert_eq!(buf, body);
        assert_eq!(http_stream.body_reader.body_state, ParseState::Complete(10));
        assert_eq!(http_stream.body_bytes_read(), 10);

        assert!(http_stream.is_body_done());

        let mut response = ResponseHeader::build(StatusCode::SWITCHING_PROTOCOLS, None).unwrap();
        response.set_version(http::Version::HTTP_11);
        http_stream
            .write_response_header(Box::new(response))
            .await
            .unwrap();
        // body reader type switches
        assert!(!http_stream.is_body_done());

        // now the ws data
        let buf = http_stream.read_body_bytes().await.unwrap().unwrap();
        assert_eq!(buf, ws_data.as_slice());
        assert!(!http_stream.is_body_done());

        // EOF ends body
        assert!(http_stream.read_body_bytes().await.unwrap().is_none());
        assert!(http_stream.is_body_done());
    }

    #[rstest]
    #[case::content_length(POST_CL_UPGRADE_REQ, POST_BODY_DATA, POST_BODY_DATA)]
    #[case::chunked(POST_CHUNKED_UPGRADE_REQ, POST_BODY_DATA, POST_BODY_DATA_CHUNKED)]
    #[tokio::test]
    async fn read_upgrade_req_with_preread_body(
        #[case] header: &[u8],
        #[case] body: &[u8],
        #[case] body_wire: &[u8],
    ) {
        let ws_data = b"data";
        let data_wire = [header, body_wire, ws_data.as_slice()].concat();
        let mock_io = Builder::new()
            .read(&data_wire[..])
            .write(b"HTTP/1.1 101 Switching Protocols\r\n\r\n")
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        assert!(http_stream.is_upgrade_req());
        // request has body
        assert!(!http_stream.is_body_done());

        let mut buf = vec![];
        while let Some(b) = http_stream.read_body_bytes().await.unwrap() {
            buf.put_slice(&b);
        }
        assert_eq!(buf, body);
        assert_eq!(http_stream.body_reader.body_state, ParseState::Complete(10));
        assert_eq!(http_stream.body_bytes_read(), 10);

        assert!(http_stream.is_body_done());

        let mut response = ResponseHeader::build(StatusCode::SWITCHING_PROTOCOLS, None).unwrap();
        response.set_version(http::Version::HTTP_11);
        http_stream
            .write_response_header(Box::new(response))
            .await
            .unwrap();
        // body reader type switches
        assert!(!http_stream.is_body_done());

        // now the ws data
        let buf = http_stream.read_body_bytes().await.unwrap().unwrap();
        assert_eq!(buf, ws_data.as_slice());
        assert!(!http_stream.is_body_done());

        // EOF ends body
        assert!(http_stream.read_body_bytes().await.unwrap().is_none());
        assert!(http_stream.is_body_done());
    }

    #[rstest]
    #[case::content_length(POST_CL_UPGRADE_REQ, POST_BODY_DATA)]
    #[case::chunked(POST_CHUNKED_UPGRADE_REQ, POST_BODY_DATA_CHUNKED)]
    #[tokio::test]
    async fn read_upgrade_req_with_preread_body_after_101(
        #[case] header: &[u8],
        #[case] body_wire: &[u8],
    ) {
        let ws_data = b"data";
        let data_wire = [header, body_wire, ws_data.as_slice()].concat();
        let mock_io = Builder::new()
            .read(&data_wire[..])
            .write(b"HTTP/1.1 101 Switching Protocols\r\n\r\n")
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        assert!(http_stream.is_upgrade_req());
        // request has body
        assert!(!http_stream.is_body_done());

        let mut response = ResponseHeader::build(StatusCode::SWITCHING_PROTOCOLS, None).unwrap();
        response.set_version(http::Version::HTTP_11);
        http_stream
            .write_response_header(Box::new(response))
            .await
            .unwrap();
        // body reader type switches to http10
        assert!(!http_stream.is_body_done());

        let mut buf = vec![];
        while let Some(b) = http_stream.read_body_bytes().await.unwrap() {
            buf.put_slice(&b);
        }
        let expected_body = [body_wire, ws_data.as_slice()].concat();
        assert_eq!(buf, expected_body.as_bytes());
        assert_eq!(http_stream.body_bytes_read(), expected_body.len());
        assert!(http_stream.is_body_done());
    }

    #[tokio::test]
    async fn read_upgrade_req_with_1xx_response() {
        let input = b"GET / HTTP/1.1\r\nHost: pingora.org\r\nUpgrade: websocket\r\nConnection: upgrade\r\n\r\n";
        let mock_io = Builder::new()
            .read(&input[..])
            .write(b"HTTP/1.1 100 Continue\r\n\r\n")
            .write(b"HTTP/1.1 101 Switching Protocols\r\n\r\n")
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        assert!(http_stream.is_upgrade_req());
        let mut response = ResponseHeader::build(StatusCode::CONTINUE, None).unwrap();
        response.set_version(http::Version::HTTP_11);
        http_stream
            .write_response_header(Box::new(response))
            .await
            .unwrap();
        // 100 won't affect body state
        // current GET request is done
        assert!(http_stream.is_body_done());

        let mut response = ResponseHeader::build(StatusCode::SWITCHING_PROTOCOLS, None).unwrap();
        response.set_version(http::Version::HTTP_11);
        http_stream
            .write_response_header(Box::new(response))
            .await
            .unwrap();
        // body reader type switches
        assert!(!http_stream.is_body_done());
        // EOF ends body
        assert!(http_stream.read_body_bytes().await.unwrap().is_none());
        assert!(http_stream.is_body_done());
    }

    #[tokio::test]
    async fn test_upgrade_without_content_length_with_ws_data() {
        let request = b"GET / HTTP/1.1\r\nHost: pingora.org\r\nUpgrade: websocket\r\nConnection: upgrade\r\n\r\n";
        let ws_data = b"websocket data";

        let mock_io = Builder::new()
            .read(request)
            .write(b"HTTP/1.1 101 Switching Protocols\r\n\r\n")
            .read(ws_data) // websocket data sent after 101
            .build();

        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        assert!(http_stream.is_upgrade_req());

        // When enabled (default), is_body_done() is called before the upgrade
        http_stream.set_close_on_response_before_downstream_finish(false);

        // Send 101 response - this is where the bug occurs
        let mut response = ResponseHeader::build(StatusCode::SWITCHING_PROTOCOLS, None).unwrap();
        response.set_version(http::Version::HTTP_11);
        http_stream
            .write_response_header(Box::new(response))
            .await
            .unwrap();

        assert_eq!(
            http_stream.body_reader.body_state,
            ParseState::UntilClose(0),
            "Body reader should be in UntilClose mode after 101 for upgraded connections"
        );

        // Try to read websocket data
        let mut buf = vec![];
        while let Some(b) = http_stream.read_body_bytes().await.unwrap() {
            buf.put_slice(&b);
        }
        assert_eq!(buf, ws_data, "Expected to read websocket data after 101");
    }

    #[tokio::test]
    async fn set_server_keepalive() {
        // close
        let input = b"GET / HTTP/1.1\r\nHost: pingora.org\r\nConnection: close\r\n\r\n";
        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        // verify close
        assert_eq!(http_stream.keepalive_timeout, KeepaliveStatus::Off);
        http_stream.set_server_keepalive(Some(60));
        // verify no change on override
        assert_eq!(http_stream.keepalive_timeout, KeepaliveStatus::Off);

        // explicit keep-alive
        let input = b"GET / HTTP/1.1\r\nHost: pingora.org\r\nConnection: keep-alive\r\n\r\n";
        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        // default is infinite for 1.1
        http_stream.read_request().await.unwrap();
        assert_eq!(http_stream.keepalive_timeout, KeepaliveStatus::Infinite);
        http_stream.set_server_keepalive(Some(60));
        // override respected
        assert_eq!(
            http_stream.keepalive_timeout,
            KeepaliveStatus::Timeout(Duration::from_secs(60))
        );

        // not specified
        let input = b"GET / HTTP/1.1\r\nHost: pingora.org\r\n\r\n";
        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        // default is infinite for 1.1
        assert_eq!(http_stream.keepalive_timeout, KeepaliveStatus::Infinite);
        http_stream.set_server_keepalive(Some(60));
        // override respected
        assert_eq!(
            http_stream.keepalive_timeout,
            KeepaliveStatus::Timeout(Duration::from_secs(60))
        );
    }

    #[tokio::test]
    async fn write() {
        let read_wire = b"GET / HTTP/1.1\r\n\r\n";
        let write_expected = b"HTTP/1.1 200 OK\r\nFoo: Bar\r\n\r\n";
        let mock_io = Builder::new().read(read_wire).write(write_expected).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        let mut new_response = ResponseHeader::build(StatusCode::OK, None).unwrap();
        new_response.append_header("Foo", "Bar").unwrap();
        http_stream.update_resp_headers = false;
        http_stream
            .write_response_header_ref(&new_response)
            .await
            .unwrap();
    }

    #[tokio::test]
    async fn write_custom_reason() {
        let read_wire = b"GET / HTTP/1.1\r\n\r\n";
        let write_expected = b"HTTP/1.1 200 Just Fine\r\nFoo: Bar\r\n\r\n";
        let mock_io = Builder::new().read(read_wire).write(write_expected).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        let mut new_response = ResponseHeader::build(StatusCode::OK, None).unwrap();
        new_response.set_reason_phrase(Some("Just Fine")).unwrap();
        new_response.append_header("Foo", "Bar").unwrap();
        http_stream.update_resp_headers = false;
        http_stream
            .write_response_header_ref(&new_response)
            .await
            .unwrap();
    }

    #[tokio::test]
    async fn write_informational() {
        let read_wire = b"GET / HTTP/1.1\r\n\r\n";
        let write_expected = b"HTTP/1.1 100 Continue\r\n\r\nHTTP/1.1 200 OK\r\nFoo: Bar\r\n\r\n";
        let mock_io = Builder::new().read(read_wire).write(write_expected).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        let response_100 = ResponseHeader::build(StatusCode::CONTINUE, None).unwrap();
        http_stream
            .write_response_header_ref(&response_100)
            .await
            .unwrap();
        let mut response_200 = ResponseHeader::build(StatusCode::OK, None).unwrap();
        response_200.append_header("Foo", "Bar").unwrap();
        http_stream.update_resp_headers = false;
        http_stream
            .write_response_header_ref(&response_200)
            .await
            .unwrap();
    }

    #[tokio::test]
    async fn write_informational_ignored() {
        let read_wire = b"GET / HTTP/1.1\r\n\r\n";
        let write_expected = b"HTTP/1.1 200 OK\r\nFoo: Bar\r\n\r\n";
        let mock_io = Builder::new().read(read_wire).write(write_expected).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        // ignore the 100 Continue
        http_stream.ignore_info_resp = true;
        http_stream.read_request().await.unwrap();
        let response_100 = ResponseHeader::build(StatusCode::CONTINUE, None).unwrap();
        http_stream
            .write_response_header_ref(&response_100)
            .await
            .unwrap();
        let mut response_200 = ResponseHeader::build(StatusCode::OK, None).unwrap();
        response_200.append_header("Foo", "Bar").unwrap();
        http_stream.update_resp_headers = false;
        http_stream
            .write_response_header_ref(&response_200)
            .await
            .unwrap();
    }

    #[tokio::test]
    async fn write_informational_100_not_ignored_if_expect_continue() {
        let input = b"GET / HTTP/1.1\r\nExpect: 100-continue\r\n\r\n";
        let output = b"HTTP/1.1 100 Continue\r\n\r\nHTTP/1.1 200 OK\r\nFoo: Bar\r\n\r\n";

        let mock_io = Builder::new().read(&input[..]).write(output).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        http_stream.ignore_info_resp = true;
        // 100 Continue is not ignored due to Expect: 100-continue on request
        let response_100 = ResponseHeader::build(StatusCode::CONTINUE, None).unwrap();
        http_stream
            .write_response_header_ref(&response_100)
            .await
            .unwrap();
        let mut response_200 = ResponseHeader::build(StatusCode::OK, None).unwrap();
        response_200.append_header("Foo", "Bar").unwrap();
        http_stream.update_resp_headers = false;
        http_stream
            .write_response_header_ref(&response_200)
            .await
            .unwrap();
    }

    #[tokio::test]
    async fn write_informational_1xx_ignored_if_expect_continue() {
        let input = b"GET / HTTP/1.1\r\nExpect: 100-continue\r\n\r\n";
        let output = b"HTTP/1.1 200 OK\r\nFoo: Bar\r\n\r\n";

        let mock_io = Builder::new().read(&input[..]).write(output).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        http_stream.ignore_info_resp = true;
        // 102 Processing is ignored
        let response_102 = ResponseHeader::build(StatusCode::PROCESSING, None).unwrap();
        http_stream
            .write_response_header_ref(&response_102)
            .await
            .unwrap();
        let mut response_200 = ResponseHeader::build(StatusCode::OK, None).unwrap();
        response_200.append_header("Foo", "Bar").unwrap();
        http_stream.update_resp_headers = false;
        http_stream
            .write_response_header_ref(&response_200)
            .await
            .unwrap();
    }

    #[tokio::test]
    async fn write_101_switching_protocol() {
        let read_wire = b"GET / HTTP/1.1\r\nUpgrade: websocket\r\n\r\n";
        let wire = b"HTTP/1.1 101 Switching Protocols\r\nFoo: Bar\r\n\r\n";
        let wire_body = b"nPAYLOAD";
        let mock_io = Builder::new()
            .read(read_wire)
            .write(wire)
            .write(wire_body)
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        let mut response_101 =
            ResponseHeader::build(StatusCode::SWITCHING_PROTOCOLS, None).unwrap();
        response_101.append_header("Foo", "Bar").unwrap();
        http_stream
            .write_response_header_ref(&response_101)
            .await
            .unwrap();
        assert_eq!(http_stream.body_writer.body_mode, BodyMode::UntilClose(0));

        let n = http_stream.write_body(wire_body).await.unwrap().unwrap();
        assert_eq!(wire_body.len(), n);
        assert_eq!(http_stream.body_writer.body_mode, BodyMode::UntilClose(n));

        // this write should be ignored
        let response_502 = ResponseHeader::build(StatusCode::BAD_GATEWAY, None).unwrap();
        http_stream
            .write_response_header_ref(&response_502)
            .await
            .unwrap();
    }

    #[tokio::test]
    async fn write_body_cl() {
        let read_wire = b"GET / HTTP/1.1\r\n\r\n";
        let wire_header = b"HTTP/1.1 200 OK\r\nContent-Length: 1\r\n\r\n";
        let wire_body = b"a";
        let mock_io = Builder::new()
            .read(read_wire)
            .write(wire_header)
            .write(wire_body)
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        let mut new_response = ResponseHeader::build(StatusCode::OK, None).unwrap();
        new_response.append_header("Content-Length", "1").unwrap();
        http_stream.update_resp_headers = false;
        http_stream
            .write_response_header_ref(&new_response)
            .await
            .unwrap();
        assert_eq!(
            http_stream.body_writer.body_mode,
            BodyMode::ContentLength(1, 0)
        );
        let n = http_stream.write_body(wire_body).await.unwrap().unwrap();
        assert_eq!(wire_body.len(), n);
        let n = http_stream.finish_body().await.unwrap().unwrap();
        assert_eq!(wire_body.len(), n);
    }

    #[tokio::test]
    async fn write_body_http10() {
        let read_wire = b"GET / HTTP/1.1\r\n\r\n";
        let wire_header = b"HTTP/1.1 200 OK\r\n\r\n";
        let wire_body = b"a";
        let mock_io = Builder::new()
            .read(read_wire)
            .write(wire_header)
            .write(wire_body)
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        let new_response = ResponseHeader::build(StatusCode::OK, None).unwrap();
        http_stream.update_resp_headers = false;
        http_stream
            .write_response_header_ref(&new_response)
            .await
            .unwrap();
        assert_eq!(http_stream.body_writer.body_mode, BodyMode::UntilClose(0));
        let n = http_stream.write_body(wire_body).await.unwrap().unwrap();
        assert_eq!(wire_body.len(), n);
        let n = http_stream.finish_body().await.unwrap().unwrap();
        assert_eq!(wire_body.len(), n);
    }

    #[tokio::test]
    async fn write_body_chunk() {
        let read_wire = b"GET / HTTP/1.1\r\n\r\n";
        let wire_header = b"HTTP/1.1 200 OK\r\nTransfer-Encoding: chunked\r\n\r\n";
        let wire_body = b"1\r\na\r\n";
        let wire_end = b"0\r\n\r\n";
        let mock_io = Builder::new()
            .read(read_wire)
            .write(wire_header)
            .write(wire_body)
            .write(wire_end)
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        let mut new_response = ResponseHeader::build(StatusCode::OK, None).unwrap();
        new_response
            .append_header("Transfer-Encoding", "chunked")
            .unwrap();
        http_stream.update_resp_headers = false;
        http_stream
            .write_response_header_ref(&new_response)
            .await
            .unwrap();
        assert_eq!(
            http_stream.body_writer.body_mode,
            BodyMode::ChunkedEncoding(0)
        );
        let n = http_stream.write_body(b"a").await.unwrap().unwrap();
        assert_eq!(b"a".len(), n);
        let n = http_stream.finish_body().await.unwrap().unwrap();
        assert_eq!(b"a".len(), n);
    }

    #[tokio::test]
    async fn read_with_illegal() {
        init_log();
        let input1 = b"GET /a?q=b c HTTP/1.1\r\n";
        let input2 = b"Host: pingora.org\r\nContent-Length: 3\r\n\r\n";
        let input3 = b"abc";
        let mock_io = Builder::new()
            .read(&input1[..])
            .read(&input2[..])
            .read(&input3[..])
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        assert_eq!(http_stream.get_path(), &b"/a?q=b%20c"[..]);
        let res = http_stream.read_body().await.unwrap().unwrap();
        assert_eq!(res, BufRef::new(0, 3));
        assert_eq!(http_stream.body_reader.body_state, ParseState::Complete(3));
        assert_eq!(input3, http_stream.get_body(&res));
    }

    #[test]
    fn escape_illegal() {
        init_log();
        // in query string
        let input = BytesMut::from(
            &b"GET /a?q=<\"b c\"> HTTP/1.1\r\nHost: pingora.org\r\nContent-Length: 3\r\n\r\n"[..],
        );
        let output = escape_illegal_request_line(&input).unwrap();
        assert_eq!(
            &output,
            &b"GET /a?q=%3C%22b%20c%22%3E HTTP/1.1\r\nHost: pingora.org\r\nContent-Length: 3\r\n\r\n"[..]
        );

        // in path
        let input = BytesMut::from(
            &b"GET /a:\"bc\" HTTP/1.1\r\nHost: pingora.org\r\nContent-Length: 3\r\n\r\n"[..],
        );
        let output = escape_illegal_request_line(&input).unwrap();
        assert_eq!(
            &output,
            &b"GET /a:%22bc%22 HTTP/1.1\r\nHost: pingora.org\r\nContent-Length: 3\r\n\r\n"[..]
        );

        // empty uri, unable to parse
        let input =
            BytesMut::from(&b"GET  HTTP/1.1\r\nHost: pingora.org\r\nContent-Length: 3\r\n\r\n"[..]);
        assert!(escape_illegal_request_line(&input).is_none());
    }

    #[tokio::test]
    async fn test_write_body_buf() {
        let read_wire = b"GET / HTTP/1.1\r\n\r\n";
        let write_expected = b"HTTP/1.1 200 OK\r\nFoo: Bar\r\n\r\n";
        let mock_io = Builder::new().read(read_wire).write(write_expected).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        let mut new_response = ResponseHeader::build(StatusCode::OK, None).unwrap();
        new_response.append_header("Foo", "Bar").unwrap();
        http_stream.update_resp_headers = false;
        http_stream
            .write_response_header_ref(&new_response)
            .await
            .unwrap();
        let written = http_stream.write_body_buf().await.unwrap();
        assert!(written.is_none());
    }

    #[tokio::test]
    #[should_panic(expected = "There is still data left to write.")]
    async fn test_write_body_buf_write_timeout() {
        let read_wire = b"GET / HTTP/1.1\r\n\r\n";
        let wire1 = b"HTTP/1.1 200 OK\r\nContent-Length: 3\r\n\r\n";
        let wire2 = b"abc";
        let mock_io = Builder::new()
            .read(read_wire)
            .write(wire1)
            .wait(Duration::from_millis(500))
            .write(wire2)
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        http_stream.write_timeout = Some(Duration::from_millis(100));
        let mut new_response = ResponseHeader::build(StatusCode::OK, None).unwrap();
        new_response.append_header("Content-Length", "3").unwrap();
        http_stream.update_resp_headers = false;
        http_stream
            .write_response_header_ref(&new_response)
            .await
            .unwrap();
        http_stream.body_write_buf = BytesMut::from(&b"abc"[..]);
        let res = http_stream.write_body_buf().await;
        assert_eq!(res.unwrap_err().etype(), &WriteTimedout);
    }

    #[tokio::test]
    async fn test_write_continue_resp() {
        let read_wire = b"GET / HTTP/1.1\r\n\r\n";
        let write_expected = b"HTTP/1.1 100 Continue\r\n\r\n";
        let mock_io = Builder::new().read(read_wire).write(write_expected).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        http_stream.write_continue_response().await.unwrap();
    }

    #[test]
    fn test_get_write_timeout() {
        let mut http_stream = HttpSession::new(Box::new(Builder::new().build()));
        let expected = Duration::from_secs(5);

        http_stream.set_write_timeout(Some(expected));
        assert_eq!(Some(expected), http_stream.write_timeout(50));
    }

    #[test]
    fn test_get_write_timeout_none() {
        let http_stream = HttpSession::new(Box::new(Builder::new().build()));
        assert!(http_stream.write_timeout(50).is_none());
    }

    #[test]
    fn test_get_write_timeout_min_send_rate_zero() {
        let mut http_stream = HttpSession::new(Box::new(Builder::new().build()));
        http_stream.set_min_send_rate(Some(0));
        assert!(http_stream.write_timeout(50).is_none());

        let mut http_stream = HttpSession::new(Box::new(Builder::new().build()));
        http_stream.set_min_send_rate(None);
        assert!(http_stream.write_timeout(50).is_none());
    }

    #[test]
    fn test_get_write_timeout_min_send_rate_overrides_write_timeout() {
        let mut http_stream = HttpSession::new(Box::new(Builder::new().build()));
        let expected = Duration::from_millis(29800);

        http_stream.set_write_timeout(Some(Duration::from_secs(60)));
        http_stream.set_min_send_rate(Some(5000));

        assert_eq!(Some(expected), http_stream.write_timeout(149000));
    }

    #[test]
    fn test_get_write_timeout_min_send_rate_max_zero_buf() {
        let mut http_stream = HttpSession::new(Box::new(Builder::new().build()));
        let expected = Duration::from_secs(1);

        http_stream.set_min_send_rate(Some(1));
        assert_eq!(Some(expected), http_stream.write_timeout(0));
    }

    #[tokio::test]
    async fn test_te_and_cl_disables_keepalive() {
        // When both Transfer-Encoding and Content-Length are present,
        // we must disable keepalive per RFC 9112 Section 6.1
        // https://datatracker.ietf.org/doc/html/rfc9112#section-6.1-15
        let input = b"POST / HTTP/1.1\r\n\
Host: pingora.org\r\n\
Transfer-Encoding: chunked\r\n\
Content-Length: 10\r\n\
\r\n\
5\r\n\
hello\r\n\
0\r\n\
\r\n";
        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();

        // Keepalive should be disabled
        assert_eq!(http_stream.keepalive_timeout, KeepaliveStatus::Off);

        // Content-Length header should have been removed
        assert!(!http_stream
            .req_header()
            .headers
            .contains_key(CONTENT_LENGTH));

        // Transfer-Encoding should still be present
        assert!(http_stream
            .req_header()
            .headers
            .contains_key(TRANSFER_ENCODING));
    }

    #[tokio::test]
    async fn test_http10_request_with_transfer_encoding_rejected() {
        // HTTP/1.0 requests MUST NOT contain Transfer-Encoding
        let input = b"POST / HTTP/1.0\r\n\
Host: pingora.org\r\n\
Transfer-Encoding: chunked\r\n\
\r\n\
5\r\n\
hello\r\n\
0\r\n\
\r\n";
        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let result = http_stream.read_request().await;

        // Should be rejected with InvalidHTTPHeader error
        assert!(result.is_err());
        let err = result.unwrap_err();
        assert_eq!(err.etype(), &InvalidHTTPHeader);
        assert!(err.to_string().contains("Transfer-Encoding"));
    }

    #[tokio::test]
    async fn test_http10_request_without_transfer_encoding_accepted() {
        // HTTP/1.0 requests without Transfer-Encoding should be accepted
        let input = b"POST / HTTP/1.0\r\n\
Host: pingora.org\r\n\
Content-Length: 5\r\n\
\r\n\
hello";
        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let result = http_stream.read_request().await;

        // Should succeed
        assert!(result.is_ok());
        assert_eq!(http_stream.req_header().version, http::Version::HTTP_10);
    }

    #[tokio::test]
    async fn test_http11_request_with_transfer_encoding_accepted() {
        // HTTP/1.1 with Transfer-Encoding should be accepted (contrast with HTTP/1.0)
        let input = b"POST / HTTP/1.1\r\n\
Host: pingora.org\r\n\
Transfer-Encoding: chunked\r\n\
\r\n\
5\r\n\
hello\r\n\
0\r\n\
\r\n";
        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        let result = http_stream.read_request().await;

        // Should succeed
        assert!(result.is_ok());
        assert_eq!(http_stream.req_header().version, http::Version::HTTP_11);
    }

    #[tokio::test]
    async fn test_request_multiple_transfer_encoding_headers() {
        init_log();
        // Multiple TE headers should be treated as comma-separated
        let input = b"POST / HTTP/1.1\r\n\
Host: pingora.org\r\n\
Transfer-Encoding: gzip\r\n\
Transfer-Encoding: chunked\r\n\
\r\n\
5\r\n\
hello\r\n\
0\r\n\
\r\n";

        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();

        // Should correctly identify chunked encoding from last header
        assert!(http_stream.is_chunked_encoding());

        // Verify body can be read correctly
        let body = http_stream.read_body_bytes().await.unwrap();
        assert_eq!(body.unwrap().as_ref(), b"hello");
    }

    #[tokio::test]
    async fn test_request_multiple_te_headers_chunked_not_last() {
        init_log();
        // Chunked in first header but not last - should NOT be chunked
        // Only the final Transfer-Encoding determines if body is chunked
        let input = b"POST / HTTP/1.1\r\n\
Host: pingora.org\r\n\
Transfer-Encoding: chunked\r\n\
Transfer-Encoding: identity\r\n\
Content-Length: 5\r\n\
\r\n";

        let mock_io = Builder::new().read(&input[..]).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        // should fail validation
        http_stream.read_request().await.unwrap_err();
    }

    #[tokio::test]
    async fn test_no_more_reuses_explicitly_disables_reuse() {
        init_log();
        let wire_req = b"GET /test HTTP/1.1\r\n\r\n";
        let wire_header = b"HTTP/1.1 200 OK\r\n\r\n";
        let mock_io = Builder::new()
            .read(&wire_req[..])
            .write(wire_header)
            .build();
        let mut http_session = HttpSession::new(Box::new(mock_io));

        // Setting the number of keepalive reuses here overrides the keepalive
        // setting below
        http_session.set_keepalive_reuses_remaining(Some(0));

        http_session.read_request().await.unwrap();

        let new_response = ResponseHeader::build(StatusCode::OK, None).unwrap();
        http_session.update_resp_headers = false;
        http_session
            .write_response_header(Box::new(new_response))
            .await
            .unwrap();

        assert_eq!(http_session.body_writer.body_mode, BodyMode::UntilClose(0));

        http_session.finish_body().await.unwrap().unwrap();

        http_session.set_keepalive(Some(100));
        let reused = http_session.reuse().await.unwrap();
        assert!(reused.is_none());
    }

    #[tokio::test]
    async fn test_close_delimited_response_explicitly_disables_reuse() {
        init_log();
        let wire_req = b"GET /test HTTP/1.1\r\n\r\n";
        let wire_header = b"HTTP/1.1 200 OK\r\n\r\n";
        let mock_io = Builder::new()
            .read(&wire_req[..])
            .write(wire_header)
            .build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();

        let new_response = ResponseHeader::build(StatusCode::OK, None).unwrap();
        http_stream.update_resp_headers = false;
        http_stream
            .write_response_header(Box::new(new_response))
            .await
            .unwrap();

        assert_eq!(http_stream.body_writer.body_mode, BodyMode::UntilClose(0));

        http_stream.finish_body().await.unwrap().unwrap();

        let reused = http_stream.reuse().await.unwrap();
        assert!(reused.is_none());
    }
}

#[cfg(test)]
mod test_sync {
    use super::*;
    use http::StatusCode;
    use log::{debug, error};
    use std::str;

    fn init_log() {
        let _ = env_logger::builder().is_test(true).try_init();
    }

    #[test]
    fn test_response_to_wire() {
        init_log();
        let mut new_response = ResponseHeader::build(StatusCode::OK, None).unwrap();
        new_response.append_header("Foo", "Bar").unwrap();
        let mut wire = BytesMut::with_capacity(INIT_HEADER_BUF_SIZE);
        http_resp_header_to_buf(&new_response, &mut wire).unwrap();
        debug!("{}", str::from_utf8(wire.as_ref()).unwrap());
        let mut headers = [httparse::EMPTY_HEADER; 128];
        let mut resp = httparse::Response::new(&mut headers);
        let result = resp.parse(wire.as_ref());
        match result {
            Ok(_) => {}
            Err(e) => error!("{:?}", e),
        }
        assert!(result.unwrap().is_complete());
        // FIXME: the order is not guaranteed
        assert_eq!(b"Foo", headers[0].name.as_bytes());
        assert_eq!(b"Bar", headers[0].value);
    }
}

#[cfg(test)]
mod test_timeouts {
    use super::*;
    use std::future::IntoFuture;
    use tokio_test::io::{Builder, Mock};

    /// An upper limit for any read within any test to prevent tests from hanging forever if
    /// an internal read call never returns, etc.
    const TEST_MAX_WAIT_FOR_READ: Duration = Duration::from_secs(3);

    /// The duration of 600 seconds is chosen to be "effectively forever" for the purpose of testing
    const TEST_FOREVER_DURATION: Duration = Duration::from_secs(600);

    /// The read_timeout to use, when we want to test that a read operation times out
    const TEST_READ_TIMEOUT: Duration = Duration::from_secs(1);

    #[derive(Debug)]
    struct ReadBlockedForeverError;

    /// Returns a client stream that will "never" send any bytes / return from a read operation
    fn mocked_blocking_headers_forever_stream() -> Box<Mock> {
        Box::new(Builder::new().wait(TEST_FOREVER_DURATION).build())
    }

    fn mocked_blocking_body_forever_stream() -> Box<Mock> {
        let http1 = b"GET / HTTP/1.1\r\n";
        let http2 = b"Host: pingora.example\r\nContent-Length: 3\r\n\r\n";
        Box::new(
            Builder::new()
                .read(&http1[..])
                .read(&http2[..])
                .wait(TEST_FOREVER_DURATION)
                .build(),
        )
    }

    /// Helper function to test a read operation with a tokio timeout
    /// to prevent tests from hanging forever in case of a bug
    async fn test_read_with_tokio_timeout<F, T>(
        read_future: F,
    ) -> Result<Result<T, Box<Error>>, ReadBlockedForeverError>
    where
        F: IntoFuture<Output = Result<T, Box<Error>>>,
    {
        let read_result = tokio::time::timeout(TEST_MAX_WAIT_FOR_READ, read_future).await;
        read_result.map_err(|_| ReadBlockedForeverError)
    }

    #[tokio::test]
    async fn test_read_http_request_headers_timeout_for_read_request() {
        // confirm that a `read_timeout` of `None` would've waited "indefinitely"
        let mut http_stream = HttpSession::new(mocked_blocking_headers_forever_stream());
        http_stream.read_timeout = None;
        let res = test_read_with_tokio_timeout(http_stream.read_request()).await;
        assert!(res.is_err()); // test timeout occurred, and not any internal Pingora timeout

        // confirm that the `read_timeout` is respected
        let mut http_stream = HttpSession::new(mocked_blocking_headers_forever_stream());
        http_stream.read_timeout = Some(TEST_READ_TIMEOUT);
        let res = test_read_with_tokio_timeout(http_stream.read_request()).await;
        assert!(res.is_ok());
        assert_eq!(res.unwrap().unwrap_err().etype(), &ReadTimedout);
    }

    #[tokio::test]
    async fn test_read_http_body_timeout_for_read_body_bytes() {
        // confirm that a `read_timeout` of `None` would've waited "indefinitely"
        let mut http_stream = HttpSession::new(mocked_blocking_body_forever_stream());
        http_stream.read_timeout = None;
        http_stream.read_request().await.unwrap();
        let res = test_read_with_tokio_timeout(http_stream.read_body_bytes()).await;
        assert!(res.is_err()); // test timeout occurred, and not any internal Pingora timeout

        // confirm that the `read_timeout` is respected
        let mut http_stream = HttpSession::new(mocked_blocking_body_forever_stream());
        http_stream.read_timeout = Some(TEST_READ_TIMEOUT);
        http_stream.read_request().await.unwrap();
        let res = test_read_with_tokio_timeout(http_stream.read_body_bytes()).await;
        assert!(res.is_ok());
        assert_eq!(res.unwrap().unwrap_err().etype(), &ReadTimedout);
    }
}

#[cfg(test)]
mod test_overread {
    use super::*;
    use rstest::rstest;
    use tokio_test::io::Builder;

    fn init_log() {
        let _ = env_logger::builder().is_test(true).try_init();
    }

    /// Test session reuse with preread body (all data in single read).
    /// When extra bytes are read beyond the request body, the session should NOT be reused.
    /// Test matrix includes whether reading body bytes is polled.
    #[rstest]
    #[case(0, None, true, true)] // CL:0, no extra, read body -> should reuse
    #[case(0, None, false, true)] // CL:0, no extra, no read -> should reuse
    #[case(0, Some(&b"extra_data_here"[..]), true, false)] // CL:0, extra, read body -> should NOT reuse
    #[case(0, Some(&b"extra_data_here"[..]), false, false)] // CL:0, extra, no read -> should NOT reuse
    #[case(5, None, true, true)] // CL:5, no extra, read body -> should reuse
    #[case(5, None, false, true)] // CL:5, no extra, no read -> should reuse
    #[case(5, Some(&b"extra"[..]), true, false)] // CL:5, extra, read body -> should NOT reuse
    #[case(5, Some(&b"extra"[..]), false, false)] // CL:5, extra, no read -> should NOT reuse
    #[tokio::test]
    async fn test_reuse_with_preread_body_overread(
        #[case] content_length: usize,
        #[case] extra_bytes: Option<&[u8]>,
        #[case] read_body: bool,
        #[case] expect_reuse: bool,
    ) {
        init_log();

        let body = b"hello";

        // Build the complete HTTP request in a single buffer
        // (all body is preread with header)
        let mut request_data = Vec::new();
        request_data.extend_from_slice(b"GET / HTTP/1.1\r\n");
        request_data.extend_from_slice(
            format!("Host: pingora.org\r\nContent-Length: {content_length}\r\n\r\n",).as_bytes(),
        );

        if content_length > 0 {
            request_data.extend_from_slice(&body[..content_length]);
        }

        if let Some(extra) = extra_bytes {
            request_data.extend_from_slice(extra);
        }

        let mock_io = Builder::new().read(&request_data).build();
        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();

        // Conditionally read the body
        if read_body {
            let result = http_stream.read_body_bytes().await.unwrap();

            if content_length == 0 {
                assert!(
                    result.is_none(),
                    "Body should be empty for Content-Length: 0"
                );
            } else {
                let body_result = result.unwrap();
                assert_eq!(body_result.as_ref(), &body[..content_length]);
            }
            assert_eq!(http_stream.body_bytes_read(), content_length);
        }

        let reused = http_stream.reuse().await.unwrap();
        assert_eq!(reused.is_some(), expect_reuse);
    }

    /// Test session reuse with chunked encoding and separate reads.
    /// When extra bytes are read beyond the request body, the session should NOT be reused.
    /// Test matrix includes whether reading body bytes is polled.
    #[rstest]
    #[case(true)]
    #[case(false)]
    #[tokio::test]
    async fn test_reuse_with_chunked_body_overread(#[case] read_body: bool) {
        init_log();

        let headers = b"GET / HTTP/1.1\r\nHost: pingora.org\r\nTransfer-Encoding: chunked\r\n\r\n";
        let body_and_extra = b"5\r\nhello\r\n0\r\n\r\nextra";

        let mock_io = Builder::new().read(headers).read(body_and_extra).build();

        let mut http_stream = HttpSession::new(Box::new(mock_io));
        http_stream.read_request().await.unwrap();
        assert!(http_stream.is_chunked_encoding());

        if read_body {
            let result = http_stream.read_body_bytes().await.unwrap();
            assert_eq!(result.unwrap().as_ref(), b"hello");

            // Read terminating chunk (returns None)
            let result = http_stream.read_body_bytes().await.unwrap();
            assert!(result.is_none());

            assert_eq!(http_stream.body_bytes_read(), 5);
        }

        let reused = http_stream.reuse().await.unwrap();
        assert!(reused.is_none());
    }
}


================================================
FILE: pingora-core/src/protocols/http/v2/client.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! HTTP/2 client session and connection
// TODO: this module needs a refactor

use bytes::Bytes;
use futures::FutureExt;
use h2::client::{self, ResponseFuture, SendRequest};
use h2::{Reason, RecvStream, SendStream};
use http::HeaderMap;
use log::{debug, error, warn};
use pingora_error::{Error, ErrorType, ErrorType::*, OrErr, Result, RetryType};
use pingora_http::{RequestHeader, ResponseHeader};
use pingora_timeout::timeout;
use std::io::ErrorKind;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::task::{ready, Context, Poll};
use std::time::Duration;
use tokio::io::{AsyncRead, AsyncWrite};
use tokio::sync::watch;

use crate::connectors::http::v2::ConnectionRef;
use crate::protocols::{Digest, SocketAddr, UniqueIDType};

pub const PING_TIMEDOUT: ErrorType = ErrorType::new("PingTimedout");

pub struct Http2Session {
    send_req: SendRequest<Bytes>,
    send_body: Option<SendStream<Bytes>>,
    resp_fut: Option<ResponseFuture>,
    req_sent: Option<Box<RequestHeader>>,
    response_header: Option<ResponseHeader>,
    response_body_reader: Option<RecvStream>,
    /// The read timeout, which will be applied to both reading the header and the body.
    /// The timeout is reset on every read. This is not a timeout on the overall duration of the
    /// response.
    pub read_timeout: Option<Duration>,
    /// The write timeout which will be applied to writing request body.
    /// The timeout is reset on every write. This is not a timeout on the overall duration of the
    /// request.
    pub write_timeout: Option<Duration>,
    pub conn: ConnectionRef,
    // Indicate that whether a END_STREAM is already sent
    ended: bool,
    // Total DATA payload bytes received from upstream response
    body_recv: usize,
}

impl Drop for Http2Session {
    fn drop(&mut self) {
        self.conn.release_stream();
    }
}

impl Http2Session {
    pub(crate) fn new(send_req: SendRequest<Bytes>, conn: ConnectionRef) -> Self {
        Http2Session {
            send_req,
            send_body: None,
            resp_fut: None,
            req_sent: None,
            response_header: None,
            response_body_reader: None,
            read_timeout: None,
            write_timeout: None,
            conn,
            ended: false,
            body_recv: 0,
        }
    }

    fn sanitize_request_header(req: &mut RequestHeader) -> Result<()> {
        req.set_version(http::Version::HTTP_2);
        if req.uri.authority().is_some() {
            return Ok(());
        }
        // use host header to populate :authority field
        let Some(authority) = req.headers.get(http::header::HOST).map(|v| v.as_bytes()) else {
            return Error::e_explain(InvalidHTTPHeader, "no authority header for h2");
        };
        let uri = http::uri::Builder::new()
            .scheme("https") // fixed for now
            .authority(authority)
            .path_and_query(req.uri.path_and_query().as_ref().unwrap().as_str())
            .build();
        match uri {
            Ok(uri) => {
                req.set_uri(uri);
                Ok(())
            }
            Err(_) => Error::e_explain(
                InvalidHTTPHeader,
                format!("invalid authority from host {authority:?}"),
            ),
        }
    }

    /// Write the request header to the server
    pub fn write_request_header(&mut self, mut req: Box<RequestHeader>, end: bool) -> Result<()> {
        if self.req_sent.is_some() {
            // cannot send again, TODO: warn
            return Ok(());
        }
        Self::sanitize_request_header(&mut req)?;
        let parts = req.as_owned_parts();
        let request = http::Request::from_parts(parts, ());
        // There is no write timeout for h2 because the actual write happens async from this fn
        let (resp_fut, send_body) = self
            .send_req
            .send_request(request, end)
            .or_err(H2Error, "while sending request")
            .map_err(|e| self.handle_err(e))?;
        self.req_sent = Some(req);
        self.send_body = Some(send_body);
        self.resp_fut = Some(resp_fut);
        self.ended = self.ended || end;

        Ok(())
    }

    /// Write a request body chunk
    pub async fn write_request_body(&mut self, data: Bytes, end: bool) -> Result<()> {
        if self.ended {
            warn!("Try to write request body after end of stream, dropping the extra data");
            return Ok(());
        }

        let body_writer = self
            .send_body
            .as_mut()
            .expect("Try to write request body before sending request header");

        super::write_body(body_writer, data, end, self.write_timeout)
            .await
            .map_err(|e| self.handle_err(e))?;
        self.ended = self.ended || end;
        Ok(())
    }

    /// Signal that the request body has ended
    pub fn finish_request_body(&mut self) -> Result<()> {
        if self.ended {
            return Ok(());
        }

        let body_writer = self
            .send_body
            .as_mut()
            .expect("Try to finish request stream before sending request header");

        // Just send an empty data frame with end of stream set
        body_writer
            .send_data("".into(), true)
            .or_err(WriteError, "while writing empty h2 request body")
            .map_err(|e| self.handle_err(e))?;
        self.ended = true;
        Ok(())
    }

    /// Read the response header
    pub async fn read_response_header(&mut self) -> Result<()> {
        // TODO: how to read 1xx headers?
        // https://github.com/hyperium/h2/issues/167

        if self.response_header.is_some() {
            panic!("H2 response header is already read")
        }

        let Some(resp_fut) = self.resp_fut.take() else {
            panic!("Try to take response header, but it is already taken")
        };

        let res = match self.read_timeout {
            Some(t) => timeout(t, resp_fut)
                .await
                .map_err(|_| Error::explain(ReadTimedout, "while reading h2 response header"))
                .map_err(|e| self.handle_err(e))?,
            None => resp_fut.await,
        };
        let (resp, body_reader) = res.map_err(handle_read_header_error)?.into_parts();
        self.response_header = Some(resp.into());
        self.response_body_reader = Some(body_reader);

        Ok(())
    }

    #[doc(hidden)]
    pub fn poll_read_response_header(
        &mut self,
        cx: &mut Context<'_>,
    ) -> Poll<Result<(), h2::Error>> {
        if self.response_header.is_some() {
            panic!("H2 response header is already read")
        }

        let Some(mut resp_fut) = self.resp_fut.take() else {
            panic!("Try to take response header, but it is already taken")
        };

        let res = match resp_fut.poll_unpin(cx) {
            Poll::Ready(Ok(res)) => res,
            Poll::Ready(Err(err)) => return Poll::Ready(Err(err)),
            Poll::Pending => {
                self.resp_fut = Some(resp_fut);
                return Poll::Pending;
            }
        };

        let (resp, body_reader) = res.into_parts();
        self.response_header = Some(resp.into());
        self.response_body_reader = Some(body_reader);

        Poll::Ready(Ok(()))
    }

    /// Read the response body
    ///
    /// `None` means, no more body to read
    pub async fn read_response_body(&mut self) -> Result<Option<Bytes>> {
        let Some(body_reader) = self.response_body_reader.as_mut() else {
            // req is not sent or response is already read
            // TODO: warn
            return Ok(None);
        };

        let fut = body_reader.data();
        let res = match self.read_timeout {
            Some(t) => timeout(t, fut)
                .await
                .map_err(|_| Error::explain(ReadTimedout, "while reading h2 response body"))?,
            None => fut.await,
        };
        let body = res
            .transpose()
            .or_err(ReadError, "while read h2 response body")
            .map_err(|mut e| {
                // cannot use handle_err() because of borrow checker
                if self.conn.ping_timedout() {
                    e.etype = PING_TIMEDOUT;
                }
                e
            })?;

        if let Some(data) = body.as_ref() {
            body_reader
                .flow_control()
                .release_capacity(data.len())
                .or_err(ReadError, "while releasing h2 response body capacity")?;
            self.body_recv = self.body_recv.saturating_add(data.len());
        }

        Ok(body)
    }

    #[doc(hidden)]
    pub fn poll_read_response_body(
        &mut self,
        cx: &mut Context<'_>,
    ) -> Poll<Option<Result<Bytes, h2::Error>>> {
        let Some(body_reader) = self.response_body_reader.as_mut() else {
            // req is not sent or response is already read
            // TODO: warn
            return Poll::Ready(None);
        };

        let data = match ready!(body_reader.poll_data(cx)).transpose() {
            Ok(data) => data,
            Err(err) => return Poll::Ready(Some(Err(err))),
        };

        if let Some(data) = data {
            body_reader.flow_control().release_capacity(data.len())?;
            return Poll::Ready(Some(Ok(data)));
        }

        Poll::Ready(None)
    }

    /// Whether the response has ended
    pub fn response_finished(&self) -> bool {
        // if response_body_reader doesn't exist, the response is not even read yet
        self.response_body_reader
            .as_ref()
            .is_some_and(|reader| reader.is_end_stream())
    }

    /// Check whether stream finished with error.
    /// Like `response_finished`, but also attempts to poll the h2 stream for errors that may have
    /// caused the stream to terminate, and returns them as `H2Error`s.
    pub fn check_response_end_or_error(&mut self) -> Result<bool> {
        let Some(reader) = self.response_body_reader.as_mut() else {
            // response is not even read
            return Ok(false);
        };

        if !reader.is_end_stream() {
            return Ok(false);
        }

        // https://github.com/hyperium/h2/issues/806
        // The fundamental issue is that h2::RecvStream may return `is_end_stream` true
        // when the stream was naturally closed via END_STREAM /OR/ if there was an error
        // while reading data frames that forced the closure.
        // The h2 API as-is makes it difficult to determine which situation is occurring.
        //
        // `poll_data` should be returning None after `is_end_stream`, if the stream
        // is truly expecting no more data to be sent.
        // https://docs.rs/h2/latest/h2/struct.RecvStream.html#method.is_end_stream
        // So poll the data once to check this condition. If an error is returned, that indicates
        // that the stream closed due to an error e.g. h2 protocol error.
        //
        // tokio::task::unconstrained because now_or_never may yield None when the future is ready
        match tokio::task::unconstrained(reader.data()).now_or_never() {
            Some(None) => Ok(true),
            Some(Some(Ok(_))) => Error::e_explain(H2Error, "unexpected data after end stream"),
            Some(Some(Err(e))) => Error::e_because(H2Error, "while checking end stream", e),
            None => {
                // RecvStream data() should be ready to poll after the stream ends,
                // this indicates an unexpected change in the h2 crate
                panic!("data() not ready after end stream")
            }
        }
    }

    /// Read the optional trailer headers
    pub async fn read_trailers(&mut self) -> Result<Option<HeaderMap>> {
        let Some(reader) = self.response_body_reader.as_mut() else {
            // response is not even read
            // TODO: warn
            return Ok(None);
        };
        let fut = reader.trailers();

        let res = match self.read_timeout {
            Some(t) => timeout(t, fut)
                .await
                .map_err(|_| Error::explain(ReadTimedout, "while reading h2 trailer"))
                .map_err(|e| self.handle_err(e))?,
            None => fut.await,
        };
        match res {
            Ok(t) => Ok(t),
            Err(e) => {
                // GOAWAY with no error: this is graceful shutdown, continue as if no trailer
                // RESET_STREAM with no error: https://datatracker.ietf.org/doc/html/rfc9113#section-8.1:
                // this is to signal client to stop uploading request without breaking the response.
                // TODO: should actually stop uploading
                // TODO: should we try reading again?
                // TODO: handle this when reading headers and body as well
                // https://github.com/hyperium/h2/issues/741

                if (e.is_go_away() || e.is_reset())
                    && e.is_remote()
                    && e.reason() == Some(Reason::NO_ERROR)
                {
                    Ok(None)
                } else {
                    Err(e)
                }
            }
        }
        .or_err(ReadError, "while reading h2 trailers")
    }

    /// The request header if it is already sent
    pub fn request_header(&self) -> Option<&RequestHeader> {
        self.req_sent.as_deref()
    }

    /// The response header if it is already read
    pub fn response_header(&self) -> Option<&ResponseHeader> {
        self.response_header.as_ref()
    }

    /// Give up the http session abruptly.
    pub fn shutdown(&mut self) {
        if !self.ended || !self.response_finished() {
            if let Some(send_body) = self.send_body.as_mut() {
                send_body.send_reset(h2::Reason::INTERNAL_ERROR)
            }
        }
    }

    /// Drop everything in this h2 stream. Return the connection ref.
    /// After this function the underlying h2 connection should already notify the closure of this
    /// stream so that another stream can be created if needed.
    pub(crate) fn conn(&self) -> ConnectionRef {
        self.conn.clone()
    }

    /// Whether ping timeout occurred. After a ping timeout, the h2 connection will be terminated.
    /// Ongoing h2 streams will receive an stream/connection error. The streams should check this
    /// flag to tell whether the error is triggered by the timeout.
    pub(crate) fn ping_timedout(&self) -> bool {
        self.conn.ping_timedout()
    }

    /// Return the [Digest] of the connection
    ///
    /// For reused connection, the timing in the digest will reflect its initial handshakes
    /// The caller should check if the connection is reused to avoid misuse the timing field.
    pub fn digest(&self) -> Option<&Digest> {
        Some(self.conn.digest())
    }

    /// Return a mutable [Digest] reference for the connection
    ///
    /// Will return `None` if multiple H2 streams are open.
    pub fn digest_mut(&mut self) -> Option<&mut Digest> {
        self.conn.digest_mut()
    }

    /// Return the server (peer) address recorded in the connection digest.
    pub fn server_addr(&self) -> Option<&SocketAddr> {
        self.conn
            .digest()
            .socket_digest
            .as_ref()
            .map(|d| d.peer_addr())?
    }

    /// Return the client (local) address recorded in the connection digest.
    pub fn client_addr(&self) -> Option<&SocketAddr> {
        self.conn
            .digest()
            .socket_digest
            .as_ref()
            .map(|d| d.local_addr())?
    }

    /// the FD of the underlying connection
    pub fn fd(&self) -> UniqueIDType {
        self.conn.id()
    }

    /// Upstream response body bytes received (HTTP/2 DATA payload; excludes headers/framing).
    pub fn body_bytes_received(&self) -> usize {
        self.body_recv
    }

    /// take the body sender to another task to perform duplex read and write
    pub fn take_request_body_writer(&mut self) -> Option<SendStream<Bytes>> {
        self.send_body.take()
    }

    fn handle_err(&self, mut e: Box<Error>) -> Box<Error> {
        if self.ping_timedout() {
            e.etype = PING_TIMEDOUT;
        }

        // is_go_away: retry via another connection, this connection is being teardown
        // should retry
        if self.response_header.is_none() {
            if let Some(err) = e.root_cause().downcast_ref::<h2::Error>() {
                if err.is_go_away()
                    && err.is_remote()
                    && (err.reason() == Some(h2::Reason::NO_ERROR))
                {
                    e.retry = true.into();
                }
            }
        }
        e
    }
}

/* helper functions */

/* Types of errors during h2 header read
 1. peer requests to downgrade to h1, mostly IIS server for NTLM: we will downgrade and retry
 2. peer sends invalid h2 frames, usually sending h1 only header: we will downgrade and retry
 3. peer sends GO_AWAY(NO_ERROR) connection is being shut down: we will retry
 4. peer IO error on reused conn, usually firewall kills old conn: we will retry
 5. peer sends REFUSED_STREAM on RST_STREAM, this is safe to retry
 6. All other errors will terminate the request
*/
fn handle_read_header_error(e: h2::Error) -> Box<Error> {
    if e.is_remote() && (e.reason() == Some(h2::Reason::HTTP_1_1_REQUIRED)) {
        let mut err = Error::because(H2Downgrade, "while reading h2 header", e);
        err.retry = true.into();
        err
    } else if e.is_go_away() && e.is_library() && (e.reason() == Some(h2::Reason::PROTOCOL_ERROR)) {
        // remote send invalid H2 responses
        let mut err = Error::because(InvalidH2, "while reading h2 header", e);
        err.retry = true.into();
        err
    } else if e.is_go_away() && e.is_remote() && (e.reason() == Some(h2::Reason::NO_ERROR)) {
        // is_go_away: retry via another connection, this connection is being teardown
        let mut err = Error::because(H2Error, "while reading h2 header", e);
        err.retry = true.into();
        err
    } else if e.is_reset() && e.is_remote() && (e.reason() == Some(h2::Reason::REFUSED_STREAM)) {
        // The REFUSED_STREAM error code can be included in a RST_STREAM frame to indicate
        // that the stream is being closed prior to any processing having occurred.
        // Any request that was sent on the reset stream can be safely retried.
        // https://datatracker.ietf.org/doc/html/rfc9113#section-8.7
        let mut err = Error::because(H2Error, "while reading h2 header", e);
        err.retry = true.into();
        err
    } else if e.is_io() {
        // is_io: typical if a previously reused connection silently drops it
        // only retry if the connection is reused
        // safety: e.get_io() will always succeed if e.is_io() is true
        let io_err = e.get_io().expect("checked is io");

        // for h2 hyperium raw_os_error() will be None unless this is a new connection
        // where we handshake() and from_io() is called, check ErrorKind explicitly with true_io_error
        let true_io_error = io_err.raw_os_error().is_some()
            || matches!(
                io_err.kind(),
                ErrorKind::ConnectionReset | ErrorKind::TimedOut | ErrorKind::BrokenPipe
            );
        let mut err = Error::because(ReadError, "while reading h2 header", e);
        if true_io_error {
            err.retry = RetryType::ReusedOnly;
        } // else could be TLS error, which is unsafe to retry
        err
    } else {
        Error::because(H2Error, "while reading h2 header", e)
    }
}

use tokio::sync::oneshot;

pub async fn drive_connection<S>(
    mut c: client::Connection<S>,
    id: UniqueIDType,
    closed: watch::Sender<bool>,
    ping_interval: Option<Duration>,
    ping_timeout_occurred: Arc<AtomicBool>,
) where
    S: AsyncRead + AsyncWrite + Send + Unpin,
{
    let interval = ping_interval.unwrap_or(Duration::ZERO);
    if !interval.is_zero() {
        // for ping to inform this fn to drop the connection
        let (tx, rx) = oneshot::channel::<()>();
        // for this fn to inform ping to give up when it is already dropped
        let dropped = Arc::new(AtomicBool::new(false));
        let dropped2 = dropped.clone();

        if let Some(ping_pong) = c.ping_pong() {
            pingora_runtime::current_handle().spawn(async move {
                do_ping_pong(ping_pong, interval, tx, dropped2, id).await;
            });
        } else {
            warn!("Cannot get ping-pong handler from h2 connection");
        }

        tokio::select! {
            r = c => match r {
                Ok(_) => debug!("H2 connection finished fd: {id}"),
                Err(e) => debug!("H2 connection fd: {id} errored: {e:?}"),
            },
            r = rx => match r {
                Ok(_) => {
                    ping_timeout_occurred.store(true, Ordering::Relaxed);
                    warn!("H2 connection Ping timeout/Error fd: {id}, closing conn");
                },
                Err(e) => warn!("H2 connection Ping Rx error {e:?}"),
            },
        };

        dropped.store(true, Ordering::Relaxed);
    } else {
        match c.await {
            Ok(_) => debug!("H2 connection finished fd: {id}"),
            Err(e) => debug!("H2 connection fd: {id} errored: {e:?}"),
        }
    }
    let _ = closed.send(true);
}

const PING_TIMEOUT: Duration = Duration::from_secs(5);

async fn do_ping_pong(
    mut ping_pong: h2::PingPong,
    interval: Duration,
    tx: oneshot::Sender<()>,
    dropped: Arc<AtomicBool>,
    id: UniqueIDType,
) {
    // delay before sending the first ping, no need to race with the first request
    tokio::time::sleep(interval).await;
    loop {
        if dropped.load(Ordering::Relaxed) {
            break;
        }
        let ping_fut = ping_pong.ping(h2::Ping::opaque());
        debug!("H2 fd: {id} ping sent");
        match tokio::time::timeout(PING_TIMEOUT, ping_fut).await {
            Err(_) => {
                error!("H2 fd: {id} ping timeout");
                let _ = tx.send(());
                break;
            }
            Ok(r) => match r {
                Ok(_) => {
                    debug!("H2 fd: {} pong received", id);
                    tokio::time::sleep(interval).await;
                }
                Err(e) => {
                    if dropped.load(Ordering::Relaxed) {
                        // drive_connection() exits first, no need to error again
                        break;
                    }
                    error!("H2 fd: {id} ping error: {e}");
                    let _ = tx.send(());
                    break;
                }
            },
        }
    }
}

#[cfg(test)]
mod tests_h2 {
    use super::*;
    use bytes::Bytes;
    use http::{Response, StatusCode};
    use tokio::io::duplex;

    #[tokio::test]
    async fn h2_body_bytes_received_multi_frames() {
        let (client_io, server_io) = duplex(65536);

        // Server: respond with two DATA frames "a" and "bc"
        tokio::spawn(async move {
            let mut conn = h2::server::handshake(server_io).await.unwrap();
            if let Some(result) = conn.accept().await {
                let (req, mut send_resp) = result.unwrap();
                assert_eq!(req.method(), http::Method::GET);
                let resp = Response::builder().status(StatusCode::OK).body(()).unwrap();
                let mut send_stream = send_resp.send_response(resp, false).unwrap();
                send_stream.send_data(Bytes::from("a"), false).unwrap();
                send_stream.send_data(Bytes::from("bc"), true).unwrap();
                // Signal graceful shutdown so the accept loop can exit after the client finishes
                conn.graceful_shutdown();
            }
            // Drive the server connection until the client closes
            while let Some(_res) = conn.accept().await {}
        });

        // Client: build Http2Session and read response
        let (send_req, connection) = h2::client::handshake(client_io).await.unwrap();
        let (closed_tx, closed_rx) = tokio::sync::watch::channel(false);
        let ping_timeout = Arc::new(AtomicBool::new(false));
        tokio::spawn(async move {
            let _ = connection.await;
            let _ = closed_tx.send(true);
        });

        let digest = Digest::default();
        let conn_ref = crate::connectors::http::v2::ConnectionRef::new(
            send_req.clone(),
            closed_rx,
            ping_timeout,
            0,
            1,
            digest,
        );
        let mut h2s = Http2Session::new(send_req, conn_ref);

        // minimal request
        let mut req = RequestHeader::build("GET", b"/", None).unwrap();
        req.insert_header(http::header::HOST, "example.com")
            .unwrap();
        h2s.write_request_header(Box::new(req), true).unwrap();
        h2s.read_response_header().await.unwrap();

        let mut total = 0;
        while let Some(chunk) = h2s.read_response_body().await.unwrap() {
            total += chunk.len();
        }
        assert_eq!(total, 3);
        assert_eq!(h2s.body_bytes_received(), 3);
    }
}


================================================
FILE: pingora-core/src/protocols/http/v2/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! HTTP/2 implementation

use std::time::Duration;

use crate::{Error, ErrorType::*, OrErr, Result};
use pingora_timeout::timeout;

use bytes::Bytes;
use h2::SendStream;

pub mod client;
pub mod server;

async fn reserve_and_send(
    writer: &mut SendStream<Bytes>,
    remaining: &mut Bytes,
    end: bool,
) -> Result<()> {
    // reserve remaining bytes then wait
    writer.reserve_capacity(remaining.len());
    let res = std::future::poll_fn(|cx| writer.poll_capacity(cx)).await;

    match res {
        None => Error::e_explain(H2Error, "cannot reserve capacity"),
        Some(ready) => {
            let n = ready.or_err(H2Error, "while waiting for capacity")?;
            let remaining_size = remaining.len();
            let data_to_send = remaining.split_to(std::cmp::min(remaining_size, n));
            writer
                .send_data(data_to_send, remaining.is_empty() && end)
                .or_err(WriteError, "while writing h2 request body")?;
            Ok(())
        }
    }
}

/// A helper function to write the body of h2 streams.
pub async fn write_body(
    writer: &mut SendStream<Bytes>,
    data: Bytes,
    end: bool,
    write_timeout: Option<Duration>,
) -> Result<()> {
    let mut remaining = data;

    // Cannot poll 0 capacity, so send it directly.
    if remaining.is_empty() {
        writer
            .send_data(remaining, end)
            .or_err(WriteError, "while writing h2 request body")?;
        return Ok(());
    }

    loop {
        match write_timeout {
            Some(t) => match timeout(t, reserve_and_send(writer, &mut remaining, end)).await {
                Ok(res) => res?,
                Err(_) => Error::e_explain(
                    WriteTimedout,
                    format!("while writing h2 request body, timeout: {t:?}"),
                )?,
            },
            None => {
                reserve_and_send(writer, &mut remaining, end).await?;
            }
        }
        if remaining.is_empty() {
            return Ok(());
        }
    }
}

#[cfg(test)]
mod test {
    use std::{sync::Arc, time::Duration};

    use bytes::Bytes;
    use futures::SinkExt;
    use h2::frame::*;
    use http::{HeaderMap, Method, Uri};
    use tokio::io::{duplex, AsyncReadExt, AsyncWriteExt, DuplexStream};
    use tokio_stream::StreamExt;

    use pingora_http::{RequestHeader, ResponseHeader};
    use pingora_timeout::sleep;

    use crate::protocols::{
        http::v2::server::{handshake, HttpSession},
        Digest,
    };

    #[tokio::test]
    async fn test_client_write_timeout() {
        let mut handles = vec![];

        let (client, mut server) = duplex(65536);

        // Client
        handles.push(tokio::spawn(async move {
            let conn = crate::connectors::http::v2::handshake(Box::new(client), 500, None)
                .await
                .unwrap();

            let mut h2_stream = conn.spawn_stream().await.unwrap().unwrap();
            h2_stream.write_timeout = Some(Duration::from_millis(100));

            let mut request = RequestHeader::build("GET", b"/", None).unwrap();
            request.insert_header("Host", "one.one.one.one").unwrap();

            h2_stream
                .write_request_header(Box::new(request), false)
                .unwrap();

            h2_stream.read_response_header().await.unwrap();
            assert_eq!(h2_stream.response_header().unwrap().status.as_u16(), 200);

            let err = h2_stream
                .write_request_body(Bytes::from_static(b"client body"), true)
                .await
                .err()
                .unwrap();
            assert_eq!(err.etype(), &pingora_error::ErrorType::WriteTimedout);
        }));

        // Server
        handles.push(tokio::spawn(async move {
            // 0. Prepare outbound frames
            let mut outbound: Vec<h2::frame::Frame<Bytes>> = Vec::new();

            let mut settings = Settings::default();

            settings.set_initial_window_size(Some(1));
            settings.set_max_concurrent_streams(Some(1));

            outbound.push(settings.into());
            outbound.push(Settings::ack().into());

            let headers = HeaderMap::new();

            outbound.push(
                Headers::new(1.into(), Pseudo::response(http::StatusCode::OK), headers).into(),
            );

            outbound.push(WindowUpdate::new(1.into(), 10000).into());

            // 1. Read preface from the client
            server.read_exact(&mut [0u8; 24]).await.unwrap();

            let mut server: h2::Codec<DuplexStream, Bytes> = h2::Codec::new(server);

            // 2. Drain client's frames
            for _ in 0..3 {
                _ = server.next().await.unwrap();
            }

            // 3. Send frames
            for (i, frame) in outbound.into_iter().enumerate() {
                if i == 3 {
                    // Delay WindowUpdate to trigger client side write timeout on capacity await
                    sleep(Duration::from_millis(200)).await;
                }
                _ = server.send(frame).await;
            }
        }));

        for handle in handles {
            // ensure no panics
            assert!(handle.await.is_ok());
        }
    }

    #[tokio::test]
    async fn test_server_write_timeout() {
        let mut handles = vec![];

        let (mut client, server) = duplex(65536);

        // Client
        handles.push(tokio::spawn(async move {
            // 0. Prepare outbound frames
            let mut outbound: Vec<h2::frame::Frame<Bytes>> = Vec::new();

            let mut settings = Settings::default();

            settings.set_initial_window_size(Some(1));
            settings.set_max_concurrent_streams(Some(1));
            outbound.push(settings.into());

            outbound.push(Settings::ack().into());

            let mut headers = Headers::new(
                1.into(),
                Pseudo::request(
                    Method::GET,
                    Uri::from_static("https://one.one.one.one"),
                    None,
                ),
                HeaderMap::new(),
            );
            headers.set_end_headers();
            outbound.push(headers.into());

            outbound.push(WindowUpdate::new(1.into(), 10000).into());

            // 1. Write h2 preface
            client
                .write_all(b"PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n")
                .await
                .unwrap();

            // 2. Send frames
            let mut client: h2::Codec<DuplexStream, Bytes> = h2::Codec::new(client);

            for (i, frame) in outbound.into_iter().enumerate() {
                if i == 3 {
                    // Delay WindowUpdate to trigger server side write timeout on capacity await
                    sleep(Duration::from_millis(200)).await;
                }
                _ = client.send(frame).await;
            }

            // 3. Drain server's frames
            for _ in 0..3 {
                _ = client.next().await.unwrap();
            }
        }));

        // Server
        let mut connection = handshake(Box::new(server), None).await.unwrap();
        let digest = Arc::new(Digest::default());

        while let Some(mut h2_stream) = HttpSession::from_h2_conn(&mut connection, digest.clone())
            .await
            .unwrap()
        {
            handles.push(tokio::spawn(async move {
                h2_stream.set_write_timeout(Some(Duration::from_millis(100)));
                let req = h2_stream.req_header();
                assert_eq!(req.method, Method::GET);

                let response_header = Box::new(ResponseHeader::build(200, None).unwrap());
                assert!(h2_stream
                    .write_response_header(response_header.clone(), false)
                    .is_ok());

                let err = h2_stream
                    .write_body(Bytes::from_static(b"server body"), true)
                    .await
                    .err()
                    .unwrap();
                assert_eq!(err.etype(), &pingora_error::ErrorType::WriteTimedout);
            }));
        }

        for handle in handles {
            // ensure no panics
            assert!(handle.await.is_ok());
        }
    }
}


================================================
FILE: pingora-core/src/protocols/http/v2/server.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! HTTP/2 server session

use bytes::Bytes;
use futures::Future;
use h2::server;
use h2::server::SendResponse;
use h2::{RecvStream, SendStream};
use http::header::HeaderName;
use http::uri::PathAndQuery;
use http::{header, HeaderMap, Response};
use log::{debug, warn};
use pingora_http::{RequestHeader, ResponseHeader};
use pingora_timeout::timeout;
use std::sync::Arc;
use std::task::ready;
use std::time::Duration;

use crate::protocols::http::body_buffer::FixedBuffer;
use crate::protocols::http::date::get_cached_date;
use crate::protocols::http::v1::client::http_req_header_to_wire;
use crate::protocols::http::HttpTask;
use crate::protocols::{Digest, SocketAddr, Stream};
use crate::{Error, ErrorType, OrErr, Result};

const BODY_BUF_LIMIT: usize = 1024 * 64;

type H2Connection<S> = server::Connection<S, Bytes>;

pub use h2::server::Builder as H2Options;

/// Perform HTTP/2 connection handshake with an established (TLS) connection.
///
/// The optional `options` allow to adjust certain HTTP/2 parameters and settings.
/// See [`H2Options`] for more details.
pub async fn handshake(io: Stream, options: Option<H2Options>) -> Result<H2Connection<Stream>> {
    let options = options.unwrap_or_default();
    let res = options.handshake(io).await;

    match res {
        Ok(connection) => {
            debug!("H2 handshake done.");
            Ok(connection)
        }
        Err(e) => Error::e_because(
            ErrorType::HandshakeError,
            "while h2 handshaking with client",
            e,
        ),
    }
}

use futures::task::Context;
use futures::task::Poll;
use std::pin::Pin;
/// The future to poll for an idle session.
///
/// Calling `.await` in this object will not return until the client decides to close this stream.
pub struct Idle<'a>(&'a mut HttpSession);

impl Future for Idle<'_> {
    type Output = Result<h2::Reason>;

    fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
        if let Some(body_writer) = self.0.send_response_body.as_mut() {
            body_writer.poll_reset(cx)
        } else {
            self.0.send_response.poll_reset(cx)
        }
        .map_err(|e| Error::because(ErrorType::H2Error, "downstream error while idling", e))
    }
}

/// HTTP/2 server session
pub struct HttpSession {
    request_header: RequestHeader,
    request_body_reader: RecvStream,
    send_response: SendResponse<Bytes>,
    send_response_body: Option<SendStream<Bytes>>,
    // Remember what has been written
    response_written: Option<Box<ResponseHeader>>,
    // Indicate that whether a END_STREAM is already sent
    // in order to tell whether needs to send one extra FRAME when this response finishes
    ended: bool,
    // How many (application, not wire) request body bytes have been read so far.
    body_read: usize,
    // How many (application, not wire) response body bytes have been sent so far.
    body_sent: usize,
    // buffered request body for retry logic
    retry_buffer: Option<FixedBuffer>,
    // digest to record underlying connection info
    digest: Arc<Digest>,
    /// The write timeout which will be applied to writing response body.
    /// The timeout is reset on every write. This is not a timeout on the overall duration of the
    /// response.
    pub write_timeout: Option<Duration>,
    // How long to wait when draining (discarding) request body
    total_drain_timeout: Option<Duration>,
}

impl HttpSession {
    /// Create a new [`HttpSession`] from the HTTP/2 connection.
    /// This function returns a new HTTP/2 session when the provided HTTP/2 connection, `conn`,
    /// establishes a new HTTP/2 stream to this server.
    ///
    /// A [`Digest`] from the IO stream is also stored in the resulting session, since the
    /// session doesn't have access to the underlying stream (and the stream itself isn't
    /// accessible from the `h2::server::Connection`).
    ///
    /// Note: in order to handle all **existing** and new HTTP/2 sessions, the server must call
    /// this function in a loop until the client decides to close the connection.
    ///
    /// `None` will be returned when the connection is closing so that the loop can exit.
    ///
    pub async fn from_h2_conn(
        conn: &mut H2Connection<Stream>,
        digest: Arc<Digest>,
    ) -> Result<Option<Self>> {
        // NOTE: conn.accept().await is what drives the entire connection.
        let res = conn.accept().await.transpose().or_err(
            ErrorType::H2Error,
            "while accepting new downstream requests",
        )?;

        Ok(res.map(|(req, send_response)| {
            let (request_header, request_body_reader) = req.into_parts();
            HttpSession {
                request_header: request_header.into(),
                request_body_reader,
                send_response,
                send_response_body: None,
                response_written: None,
                ended: false,
                body_read: 0,
                body_sent: 0,
                retry_buffer: None,
                digest,
                write_timeout: None,
                total_drain_timeout: None,
            }
        }))
    }

    /// The request sent from the client
    ///
    /// Different from its HTTP/1.X counterpart, this function never panics as the request is already
    /// read when established a new HTTP/2 stream.
    pub fn req_header(&self) -> &RequestHeader {
        &self.request_header
    }

    /// A mutable reference to request sent from the client
    ///
    /// Different from its HTTP/1.X counterpart, this function never panics as the request is already
    /// read when established a new HTTP/2 stream.
    pub fn req_header_mut(&mut self) -> &mut RequestHeader {
        &mut self.request_header
    }

    /// Read request body bytes. `None` when there is no more body to read.
    pub async fn read_body_bytes(&mut self) -> Result<Option<Bytes>> {
        // TODO: timeout
        let data = self.request_body_reader.data().await.transpose().or_err(
            ErrorType::ReadError,
            "while reading downstream request body",
        )?;
        if let Some(data) = data.as_ref() {
            self.body_read += data.len();
            if let Some(buffer) = self.retry_buffer.as_mut() {
                buffer.write_to_buffer(data);
            }
            let _ = self
                .request_body_reader
                .flow_control()
                .release_capacity(data.len());
        }
        Ok(data)
    }

    #[doc(hidden)]
    pub fn poll_read_body_bytes(
        &mut self,
        cx: &mut Context<'_>,
    ) -> Poll<Option<Result<Bytes, h2::Error>>> {
        let data = match ready!(self.request_body_reader.poll_data(cx)).transpose() {
            Ok(data) => data,
            Err(err) => return Poll::Ready(Some(Err(err))),
        };

        if let Some(data) = data {
            self.body_read += data.len();
            self.request_body_reader
                .flow_control()
                .release_capacity(data.len())?;
            return Poll::Ready(Some(Ok(data)));
        }

        Poll::Ready(None)
    }

    async fn do_drain_request_body(&mut self) -> Result<()> {
        loop {
            match self.read_body_bytes().await {
                Ok(Some(_)) => { /* continue to drain */ }
                Ok(None) => return Ok(()), // done
                Err(e) => return Err(e),
            }
        }
    }

    /// Drain the request body. `Ok(())` when there is no (more) body to read.
    // NOTE for h2 it may be worth allowing cancellation of the stream via reset.
    pub async fn drain_request_body(&mut self) -> Result<()> {
        if self.is_body_done() {
            return Ok(());
        }
        match self.total_drain_timeout {
            Some(t) => match timeout(t, self.do_drain_request_body()).await {
                Ok(res) => res,
                Err(_) => Error::e_explain(
                    ErrorType::ReadTimedout,
                    format!("draining body, timeout: {t:?}"),
                ),
            },
            None => self.do_drain_request_body().await,
        }
    }

    /// Sets the downstream write timeout. This will trigger if we're unable
    /// to write to the stream after `timeout`.
    pub fn set_write_timeout(&mut self, timeout: Option<Duration>) {
        self.write_timeout = timeout;
    }

    /// Get the write timeout.
    pub fn get_write_timeout(&self) -> Option<Duration> {
        self.write_timeout
    }

    /// Sets the total drain timeout. This `timeout` will be used while draining
    /// the request body.
    pub fn set_total_drain_timeout(&mut self, timeout: Option<Duration>) {
        self.total_drain_timeout = timeout;
    }

    /// Get the total drain timeout.
    pub fn get_total_drain_timeout(&self) -> Option<Duration> {
        self.total_drain_timeout
    }

    // the write_* don't have timeouts because the actual writing happens on the connection
    // not here.

    /// Write the response header to the client.
    /// # the `end` flag
    /// `end` marks the end of this session.
    /// If the `end` flag is set, no more header or body can be sent to the client.
    pub fn write_response_header(
        &mut self,
        mut header: Box<ResponseHeader>,
        end: bool,
    ) -> Result<()> {
        if self.ended {
            // TODO: error or warn?
            return Ok(());
        }

        if header.status.is_informational() {
            // ignore informational response 1xx header because send_response() can only be called once
            // https://github.com/hyperium/h2/issues/167
            debug!("ignoring informational headers");
            return Ok(());
        }

        if self.response_written.as_ref().is_some() {
            warn!("Response header is already sent, cannot send again");
            return Ok(());
        }

        /* update headers */
        header.insert_header(header::DATE, get_cached_date())?;

        // remove other h1 hop headers that cannot be present in H2
        // https://httpwg.org/specs/rfc7540.html#n-connection-specific-header-fields
        header.remove_header(&header::TRANSFER_ENCODING);
        header.remove_header(&header::CONNECTION);
        header.remove_header(&header::UPGRADE);
        header.remove_header(&HeaderName::from_static("keep-alive"));
        header.remove_header(&HeaderName::from_static("proxy-connection"));

        let resp = Response::from_parts(header.as_owned_parts(), ());

        let body_writer = self.send_response.send_response(resp, end).or_err(
            ErrorType::WriteError,
            "while writing h2 response to downstream",
        )?;

        self.response_written = Some(header);
        self.send_response_body = Some(body_writer);
        self.ended = self.ended || end;
        Ok(())
    }

    /// Write response body to the client. See [Self::write_response_header] for how to use `end`.
    pub async fn write_body(&mut self, data: Bytes, end: bool) -> Result<()> {
        match self.write_timeout {
            Some(t) => match timeout(t, self.do_write_body(data, end)).await {
                Ok(res) => res,
                Err(_) => Error::e_explain(
                    ErrorType::WriteTimedout,
                    format!("writing body, timeout: {t:?}"),
                ),
            },
            None => self.do_write_body(data, end).await,
        }
    }

    async fn do_write_body(&mut self, data: Bytes, end: bool) -> Result<()> {
        if self.ended {
            // NOTE: in h1, we also track to see if content-length matches the data
            // We have not tracked that in h2
            warn!("Try to write body after end of stream, dropping the extra data");
            return Ok(());
        }
        let Some(writer) = self.send_response_body.as_mut() else {
            return Err(Error::explain(
                ErrorType::H2Error,
                "try to send body before header is sent",
            ));
        };
        let data_len = data.len();
        super::write_body(writer, data, end, self.write_timeout)
            .await
            .map_err(|e| e.into_down())?;
        self.body_sent += data_len;
        self.ended = self.ended || end;
        Ok(())
    }

    /// Write response trailers to the client, this also closes the stream.
    pub fn write_trailers(&mut self, trailers: HeaderMap) -> Result<()> {
        if self.ended {
            warn!("Tried to write trailers after end of stream, dropping them");
            return Ok(());
        }
        let Some(writer) = self.send_response_body.as_mut() else {
            return Err(Error::explain(
                ErrorType::H2Error,
                "try to send trailers before header is sent",
            ));
        };
        writer.send_trailers(trailers).or_err(
            ErrorType::WriteError,
            "while writing h2 response trailers to downstream",
        )?;
        // sending trailers closes the stream
        self.ended = true;
        Ok(())
    }

    /// Similar to [Self::write_response_header], this function takes a reference instead
    pub fn write_response_header_ref(&mut self, header: &ResponseHeader, end: bool) -> Result<()> {
        self.write_response_header(Box::new(header.clone()), end)
    }

    // TODO: trailer

    /// Mark the session end. If no `end` flag is already set before this call, this call will
    /// signal the client. Otherwise this call does nothing.
    ///
    /// Dropping this object without sending `end` will cause an error to the client, which will cause
    /// the client to treat this session as bad or incomplete.
    pub fn finish(&mut self) -> Result<()> {
        if self.ended {
            // already ended the stream
            return Ok(());
        }
        if let Some(writer) = self.send_response_body.as_mut() {
            // use an empty data frame to signal the end
            writer.send_data("".into(), true).or_err(
                ErrorType::WriteError,
                "while writing h2 response body to downstream",
            )?;
            self.ended = true;
        };
        // else: the response header is not sent, do nothing now.
        // When send_response_body is dropped, an RST_STREAM will be sent

        Ok(())
    }

    pub async fn response_duplex_vec(&mut self, tasks: Vec<HttpTask>) -> Result<bool> {
        let mut end_stream = false;
        for task in tasks.into_iter() {
            end_stream = match task {
                HttpTask::Header(header, end) => {
                    self.write_response_header(header, end)
                        .map_err(|e| e.into_down())?;
                    end
                }
                HttpTask::Body(data, end) => match data {
                    Some(d) => {
                        if !d.is_empty() {
                            self.write_body(d, end).await.map_err(|e| e.into_down())?;
                        }
                        end
                    }
                    None => end,
                },
                HttpTask::UpgradedBody(..) => {
                    // Seeing an Upgraded body means that the upstream session
                    // was H1.1 that upgraded.
                    //
                    // While the downstream H2 session may encapsulate the opaque body bytes,
                    // this represents an undefined discrepancy and change between how
                    // the upstream and downstream sessions began intepreting the response body.
                    return Error::e_explain(
                        ErrorType::InternalError,
                        "upgraded body on h2 server session",
                    );
                }
                HttpTask::Trailer(Some(trailers)) => {
                    self.write_trailers(*trailers)?;
                    true
                }
                HttpTask::Trailer(None) => true,
                HttpTask::Done => true,
                HttpTask::Failed(e) => {
                    return Err(e);
                }
            } || end_stream // safe guard in case `end` in tasks flips from true to false
        }
        if end_stream {
            // no-op if finished already
            self.finish().map_err(|e| e.into_down())?;
        }
        Ok(end_stream)
    }

    /// Return a string `$METHOD $PATH, Host: $HOST`. Mostly for logging and debug purpose
    pub fn request_summary(&self) -> String {
        format!(
            "{} {}, Host: {}:{}",
            self.request_header.method,
            self.request_header
                .uri
                .path_and_query()
                .map(PathAndQuery::as_str)
                .unwrap_or_default(),
            self.request_header.uri.host().unwrap_or_default(),
            self.req_header()
                .uri
                .port()
                .as_ref()
                .map(|port| port.as_str())
                .unwrap_or_default()
        )
    }

    /// Return the written response header. `None` if it is not written yet.
    pub fn response_written(&self) -> Option<&ResponseHeader> {
        self.response_written.as_deref()
    }

    /// Give up the stream abruptly.
    ///
    /// This will send a `INTERNAL_ERROR` stream error to the client
    pub fn shutdown(&mut self) {
        if !self.ended {
            self.send_response.send_reset(h2::Reason::INTERNAL_ERROR);
        }
    }

    #[doc(hidden)]
    pub fn take_response_body_writer(&mut self) -> Option<SendStream<Bytes>> {
        self.send_response_body.take()
    }

    // This is a hack for pingora-proxy to create subrequests from h2 server session
    // TODO: be able to convert from h2 to h1 subrequest
    pub fn pseudo_raw_h1_request_header(&self) -> Bytes {
        let buf = http_req_header_to_wire(&self.request_header).unwrap(); // safe, None only when version unknown
        buf.freeze()
    }

    /// Whether there is no more body to read
    pub fn is_body_done(&self) -> bool {
        // Check no body in request
        // Also check we hit end of stream
        self.is_body_empty() || self.request_body_reader.is_end_stream()
    }

    /// Whether there is any body to read. true means there no body in request.
    pub fn is_body_empty(&self) -> bool {
        self.body_read == 0
            && (self.request_body_reader.is_end_stream()
                || self
                    .request_header
                    .headers
                    .get(header::CONTENT_LENGTH)
                    .is_some_and(|cl| cl.as_bytes() == b"0"))
    }

    pub fn retry_buffer_truncated(&self) -> bool {
        self.retry_buffer
            .as_ref()
            .map_or_else(|| false, |r| r.is_truncated())
    }

    pub fn enable_retry_buffering(&mut self) {
        if self.retry_buffer.is_none() {
            self.retry_buffer = Some(FixedBuffer::new(BODY_BUF_LIMIT))
        }
    }

    pub fn get_retry_buffer(&self) -> Option<Bytes> {
        self.retry_buffer.as_ref().and_then(|b| {
            if b.is_truncated() {
                None
            } else {
                b.get_buffer()
            }
        })
    }

    /// `async fn idle() -> Result<Reason, Error>;`
    /// This async fn will be pending forever until the client closes the stream/connection
    /// This function is used for watching client status so that the server is able to cancel
    /// its internal tasks as the client waiting for the tasks goes away
    pub fn idle(&mut self) -> Idle<'_> {
        Idle(self)
    }

    /// Similar to `read_body_bytes()` but will be pending after Ok(None) is returned,
    /// until the client closes the connection
    pub async fn read_body_or_idle(&mut self, no_body_expected: bool) -> Result<Option<Bytes>> {
        if no_body_expected || self.is_body_done() {
            let reason = self.idle().await?;
            Error::e_explain(
                ErrorType::H2Error,
                format!("Client closed H2, reason: {reason}"),
            )
        } else {
            self.read_body_bytes().await
        }
    }

    /// Return how many response body bytes (application, not wire) already sent downstream
    pub fn body_bytes_sent(&self) -> usize {
        self.body_sent
    }

    /// Return how many request body bytes (application, not wire) already read from downstream
    pub fn body_bytes_read(&self) -> usize {
        self.body_read
    }

    /// Return the [Digest] of the connection.
    pub fn digest(&self) -> Option<&Digest> {
        Some(&self.digest)
    }

    /// Return a mutable [Digest] reference for the connection.
    pub fn digest_mut(&mut self) -> Option<&mut Digest> {
        Arc::get_mut(&mut self.digest)
    }

    /// Return the server (local) address recorded in the connection digest.
    pub fn server_addr(&self) -> Option<&SocketAddr> {
        self.digest.socket_digest.as_ref().map(|d| d.local_addr())?
    }

    /// Return the client (peer) address recorded in the connection digest.
    pub fn client_addr(&self) -> Option<&SocketAddr> {
        self.digest.socket_digest.as_ref().map(|d| d.peer_addr())?
    }
}

#[cfg(test)]
mod test {
    use super::*;
    use http::{HeaderValue, Method, Request};
    use tokio::io::duplex;

    #[tokio::test]
    async fn test_server_handshake_accept_request() {
        let (client, server) = duplex(65536);
        let client_body = "test client body";
        let server_body = "test server body";

        let mut expected_trailers = HeaderMap::new();
        expected_trailers.insert("test", HeaderValue::from_static("trailers"));
        let trailers = expected_trailers.clone();

        let mut handles = vec![];
        handles.push(tokio::spawn(async move {
            let (h2, connection) = h2::client::handshake(client).await.unwrap();
            tokio::spawn(async move {
                connection.await.unwrap();
            });

            let mut h2 = h2.ready().await.unwrap();

            let request = Request::builder()
                .method(Method::GET)
                .uri("https://www.example.com/")
                .body(())
                .unwrap();

            let (response, mut req_body) = h2.send_request(request, false).unwrap();
            req_body.reserve_capacity(client_body.len());
            req_body.send_data(client_body.into(), true).unwrap();

            let (head, mut body) = response.await.unwrap().into_parts();
            assert_eq!(head.status, 200);
            let data = body.data().await.unwrap().unwrap();
            assert_eq!(data, server_body);
            let resp_trailers = body.trailers().await.unwrap().unwrap();
            assert_eq!(resp_trailers, expected_trailers);
        }));

        let mut connection = handshake(Box::new(server), None).await.unwrap();
        let digest = Arc::new(Digest::default());

        while let Some(mut http) = HttpSession::from_h2_conn(&mut connection, digest.clone())
            .await
            .unwrap()
        {
            let trailers = trailers.clone();
            handles.push(tokio::spawn(async move {
                let req = http.req_header();
                assert_eq!(req.method, Method::GET);
                assert_eq!(req.uri, "https://www.example.com/");

                http.enable_retry_buffering();

                assert!(!http.is_body_empty());
                assert!(!http.is_body_done());

                let body = http.read_body_or_idle(false).await.unwrap().unwrap();
                assert_eq!(body, client_body);
                assert!(http.is_body_done());
                assert_eq!(http.body_bytes_read(), 16);

                let retry_body = http.get_retry_buffer().unwrap();
                assert_eq!(retry_body, client_body);

                // test idling before response header is sent
                tokio::select! {
                    _ = http.idle() => {panic!("downstream should be idling")},
                    _= tokio::time::sleep(tokio::time::Duration::from_secs(1)) => {}
                }

                let response_header = Box::new(ResponseHeader::build(200, None).unwrap());
                assert!(http
                    .write_response_header(response_header.clone(), false)
                    .is_ok());
                // this write should be ignored otherwise we will error
                assert!(http.write_response_header(response_header, false).is_ok());

                // test idling after response header is sent
                tokio::select! {
                    _ = http.read_body_or_idle(false) => {panic!("downstream should be idling")},
                    _= tokio::time::sleep(tokio::time::Duration::from_secs(1)) => {}
                }

                // end: false here to verify finish() closes the stream nicely
                http.write_body(server_body.into(), false).await.unwrap();
                assert_eq!(http.body_bytes_sent(), 16);

                http.write_trailers(trailers).unwrap();
                http.finish().unwrap();
            }));
        }
        for handle in handles {
            // ensure no panics
            assert!(handle.await.is_ok());
        }
    }

    #[tokio::test]
    async fn test_req_content_length_eq_0_and_no_header_eos() {
        let (client, server) = duplex(65536);

        let server_body = "test server body";

        let mut handles = vec![];

        handles.push(tokio::spawn(async move {
            let (h2, connection) = h2::client::handshake(client).await.unwrap();
            tokio::spawn(async move {
                connection.await.unwrap();
            });

            let mut h2 = h2.ready().await.unwrap();

            let request = Request::builder()
                .method(Method::POST)
                .uri("https://www.example.com/")
                .header("content-length", "0") // explicitly set
                .body(())
                .unwrap();

            let (response, mut req_body) = h2.send_request(request, false).unwrap(); // no EOS

            let (head, mut body) = response.await.unwrap().into_parts();

            assert_eq!(head.status, 200);
            let data = body.data().await.unwrap().unwrap();
            assert_eq!(data, server_body);

            req_body.send_data("".into(), true).unwrap(); // set EOS after read the resp body
        }));

        let mut connection = handshake(Box::new(server), None).await.unwrap();
        let digest = Arc::new(Digest::default());

        while let Some(mut http) = HttpSession::from_h2_conn(&mut connection, digest.clone())
            .await
            .unwrap()
        {
            handles.push(tokio::spawn(async move {
                let req = http.req_header();
                assert_eq!(req.method, Method::POST);
                assert_eq!(req.uri, "https://www.example.com/");

                // 1. Check body related methods
                http.enable_retry_buffering();
                assert!(http.is_body_empty());
                assert!(http.is_body_done());
                let retry_body = http.get_retry_buffer();
                assert!(retry_body.is_none());

                // 2. Send response
                let response_header = Box::new(ResponseHeader::build(200, None).unwrap());
                assert!(http
                    .write_response_header(response_header.clone(), false)
                    .is_ok());

                http.write_body(server_body.into(), false).await.unwrap();
                assert_eq!(http.body_bytes_sent(), 16);

                // 3. Waiting for the reset from the client
                assert!(http.read_body_or_idle(http.is_body_done()).await.is_err());
            }));
        }

        for handle in handles {
            // ensure no panics
            assert!(handle.await.is_ok());
        }
    }

    #[tokio::test]
    async fn test_req_header_no_eos_empty_data_with_eos() {
        let (client, server) = duplex(65536);

        let server_body = "test server body";

        let mut handles = vec![];

        handles.push(tokio::spawn(async move {
            let (h2, connection) = h2::client::handshake(client).await.unwrap();
            tokio::spawn(async move {
                connection.await.unwrap();
            });

            let mut h2 = h2.ready().await.unwrap();

            let request = Request::builder()
                .method(Method::POST)
                .uri("https://www.example.com/")
                .body(())
                .unwrap();

            let (response, mut req_body) = h2.send_request(request, false).unwrap(); // no EOS

            let (head, mut body) = response.await.unwrap().into_parts();

            assert_eq!(head.status, 200);
            let data = body.data().await.unwrap().unwrap();
            assert_eq!(data, server_body);

            req_body.send_data("".into(), true).unwrap(); // set EOS after read the resp body
        }));

        let mut connection = handshake(Box::new(server), None).await.unwrap();
        let digest = Arc::new(Digest::default());

        while let Some(mut http) = HttpSession::from_h2_conn(&mut connection, digest.clone())
            .await
            .unwrap()
        {
            handles.push(tokio::spawn(async move {
                let req = http.req_header();
                assert_eq!(req.method, Method::POST);
                assert_eq!(req.uri, "https://www.example.com/");

                // 1. Check body related methods
                http.enable_retry_buffering();
                assert!(!http.is_body_empty());
                assert!(!http.is_body_done());
                let retry_body = http.get_retry_buffer();
                assert!(retry_body.is_none());

                // 2. Send response
                let response_header = Box::new(ResponseHeader::build(200, None).unwrap());
                assert!(http
                    .write_response_header(response_header.clone(), false)
                    .is_ok());

                http.write_body(server_body.into(), false).await.unwrap();
                assert_eq!(http.body_bytes_sent(), 16);

                // 3. Waiting for the client to close stream.
                http.read_body_or_idle(http.is_body_done()).await.unwrap();
            }));
        }

        for handle in handles {
            // ensure no panics
            assert!(handle.await.is_ok());
        }
    }
}


================================================
FILE: pingora-core/src/protocols/l4/ext.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Extensions to the regular TCP APIs

#![allow(non_camel_case_types)]

#[cfg(unix)]
use libc::socklen_t;
#[cfg(target_os = "linux")]
use libc::{c_int, c_ulonglong, c_void};
use pingora_error::{Error, ErrorType::*, OrErr, Result};
use std::io::{self, ErrorKind};
use std::mem;
use std::net::SocketAddr;
#[cfg(unix)]
use std::os::unix::io::{AsRawFd, RawFd};
#[cfg(windows)]
use std::os::windows::io::{AsRawSocket, RawSocket};
use std::time::Duration;
#[cfg(unix)]
use tokio::net::UnixStream;
use tokio::net::{TcpSocket, TcpStream};

use crate::connectors::l4::BindTo;

/// The (copy of) the kernel struct tcp_info returns
#[repr(C)]
#[derive(Copy, Clone, Debug)]
pub struct TCP_INFO {
    pub tcpi_state: u8,
    pub tcpi_ca_state: u8,
    pub tcpi_retransmits: u8,
    pub tcpi_probes: u8,
    pub tcpi_backoff: u8,
    pub tcpi_options: u8,
    pub tcpi_snd_wscale_4_rcv_wscale_4: u8,
    pub tcpi_delivery_rate_app_limited: u8,
    pub tcpi_rto: u32,
    pub tcpi_ato: u32,
    pub tcpi_snd_mss: u32,
    pub tcpi_rcv_mss: u32,
    pub tcpi_unacked: u32,
    pub tcpi_sacked: u32,
    pub tcpi_lost: u32,
    pub tcpi_retrans: u32,
    pub tcpi_fackets: u32,
    pub tcpi_last_data_sent: u32,
    pub tcpi_last_ack_sent: u32,
    pub tcpi_last_data_recv: u32,
    pub tcpi_last_ack_recv: u32,
    pub tcpi_pmtu: u32,
    pub tcpi_rcv_ssthresh: u32,
    pub tcpi_rtt: u32,
    pub tcpi_rttvar: u32,
    pub tcpi_snd_ssthresh: u32,
    pub tcpi_snd_cwnd: u32,
    pub tcpi_advmss: u32,
    pub tcpi_reordering: u32,
    pub tcpi_rcv_rtt: u32,
    pub tcpi_rcv_space: u32,
    pub tcpi_total_retrans: u32,
    pub tcpi_pacing_rate: u64,
    pub tcpi_max_pacing_rate: u64,
    pub tcpi_bytes_acked: u64,
    pub tcpi_bytes_received: u64,
    pub tcpi_segs_out: u32,
    pub tcpi_segs_in: u32,
    pub tcpi_notsent_bytes: u32,
    pub tcpi_min_rtt: u32,
    pub tcpi_data_segs_in: u32,
    pub tcpi_data_segs_out: u32,
    pub tcpi_delivery_rate: u64,
    pub tcpi_busy_time: u64,
    pub tcpi_rwnd_limited: u64,
    pub tcpi_sndbuf_limited: u64,
    pub tcpi_delivered: u32,
    pub tcpi_delivered_ce: u32,
    pub tcpi_bytes_sent: u64,
    pub tcpi_bytes_retrans: u64,
    pub tcpi_dsack_dups: u32,
    pub tcpi_reord_seen: u32,
    pub tcpi_rcv_ooopack: u32,
    pub tcpi_snd_wnd: u32,
    pub tcpi_rcv_wnd: u32,
    // and more, see include/linux/tcp.h
}

impl TCP_INFO {
    /// Create a new zeroed out [`TCP_INFO`]
    pub unsafe fn new() -> Self {
        mem::zeroed()
    }

    /// Return the size of [`TCP_INFO`]
    #[cfg(unix)]
    pub fn len() -> socklen_t {
        mem::size_of::<Self>() as socklen_t
    }

    /// Return the size of [`TCP_INFO`]
    #[cfg(windows)]
    pub fn len() -> usize {
        mem::size_of::<Self>()
    }
}

#[cfg(target_os = "linux")]
fn set_opt<T: Copy>(sock: c_int, opt: c_int, val: c_int, payload: T) -> io::Result<()> {
    unsafe {
        let payload = &payload as *const T as *const c_void;
        cvt_linux_error(libc::setsockopt(
            sock,
            opt,
            val,
            payload as *const _,
            mem::size_of::<T>() as socklen_t,
        ))?;
        Ok(())
    }
}

#[cfg(target_os = "linux")]
fn get_opt<T>(
    sock: c_int,
    opt: c_int,
    val: c_int,
    payload: &mut T,
    size: &mut socklen_t,
) -> io::Result<()> {
    unsafe {
        let payload = payload as *mut T as *mut c_void;
        cvt_linux_error(libc::getsockopt(sock, opt, val, payload as *mut _, size))?;
        Ok(())
    }
}

#[cfg(target_os = "linux")]
fn get_opt_sized<T>(sock: c_int, opt: c_int, val: c_int) -> io::Result<T> {
    let mut payload = mem::MaybeUninit::zeroed();
    let expected_size = mem::size_of::<T>() as socklen_t;
    let mut size = expected_size;
    get_opt(sock, opt, val, &mut payload, &mut size)?;

    if size != expected_size {
        return Err(std::io::Error::other("get_opt size mismatch"));
    }
    // Assume getsockopt() will set the value properly
    let payload = unsafe { payload.assume_init() };
    Ok(payload)
}

#[cfg(target_os = "linux")]
fn cvt_linux_error(t: i32) -> io::Result<i32> {
    if t == -1 {
        Err(io::Error::last_os_error())
    } else {
        Ok(t)
    }
}

#[cfg(target_os = "linux")]
fn ip_bind_addr_no_port(fd: RawFd, val: bool) -> io::Result<()> {
    set_opt(
        fd,
        libc::IPPROTO_IP,
        libc::IP_BIND_ADDRESS_NO_PORT,
        val as c_int,
    )
}

#[cfg(all(unix, not(target_os = "linux")))]
fn ip_bind_addr_no_port(_fd: RawFd, _val: bool) -> io::Result<()> {
    Ok(())
}

/// IP_LOCAL_PORT_RANGE is only supported on Linux 6.3 and higher,
/// ip_local_port_range() is a no-op on unsupported versions.
/// See the [man page](https://man7.org/linux/man-pages/man7/ip.7.html) for more details.
#[cfg(target_os = "linux")]
fn ip_local_port_range(fd: RawFd, low: u16, high: u16) -> io::Result<()> {
    const IP_LOCAL_PORT_RANGE: i32 = 51;
    let range: u32 = (low as u32) | ((high as u32) << 16);

    let result = set_opt(fd, libc::IPPROTO_IP, IP_LOCAL_PORT_RANGE, range as c_int);
    match result {
        Err(e) if e.raw_os_error() != Some(libc::ENOPROTOOPT) => Err(e),
        _ => Ok(()), // no error or ENOPROTOOPT
    }
}

#[cfg(all(unix, not(target_os = "linux")))]
fn ip_local_port_range(_fd: RawFd, _low: u16, _high: u16) -> io::Result<()> {
    Ok(())
}

#[cfg(windows)]
fn ip_local_port_range(_fd: RawSocket, _low: u16, _high: u16) -> io::Result<()> {
    Ok(())
}

#[cfg(target_os = "linux")]
fn set_so_keepalive(fd: RawFd, val: bool) -> io::Result<()> {
    set_opt(fd, libc::SOL_SOCKET, libc::SO_KEEPALIVE, val as c_int)
}

#[cfg(target_os = "linux")]
fn set_so_keepalive_idle(fd: RawFd, val: Duration) -> io::Result<()> {
    set_opt(
        fd,
        libc::IPPROTO_TCP,
        libc::TCP_KEEPIDLE,
        val.as_secs() as c_int, // only the seconds part of val is used
    )
}

#[cfg(target_os = "linux")]
fn set_so_keepalive_user_timeout(fd: RawFd, val: Duration) -> io::Result<()> {
    set_opt(
        fd,
        libc::IPPROTO_TCP,
        libc::TCP_USER_TIMEOUT,
        val.as_millis() as c_int, // only the ms part of val is used
    )
}

#[cfg(target_os = "linux")]
fn set_so_keepalive_interval(fd: RawFd, val: Duration) -> io::Result<()> {
    set_opt(
        fd,
        libc::IPPROTO_TCP,
        libc::TCP_KEEPINTVL,
        val.as_secs() as c_int, // only the seconds part of val is used
    )
}

#[cfg(target_os = "linux")]
fn set_so_keepalive_count(fd: RawFd, val: usize) -> io::Result<()> {
    set_opt(fd, libc::IPPROTO_TCP, libc::TCP_KEEPCNT, val as c_int)
}

#[cfg(target_os = "linux")]
fn set_keepalive(fd: RawFd, ka: &TcpKeepalive) -> io::Result<()> {
    set_so_keepalive(fd, true)?;
    set_so_keepalive_idle(fd, ka.idle)?;
    set_so_keepalive_interval(fd, ka.interval)?;
    set_so_keepalive_count(fd, ka.count)?;
    set_so_keepalive_user_timeout(fd, ka.user_timeout)
}

#[cfg(all(unix, not(target_os = "linux")))]
fn set_keepalive(_fd: RawFd, _ka: &TcpKeepalive) -> io::Result<()> {
    Ok(())
}

#[cfg(windows)]
fn set_keepalive(_sock: RawSocket, _ka: &TcpKeepalive) -> io::Result<()> {
    Ok(())
}

/// Get the kernel TCP_INFO for the given FD.
#[cfg(target_os = "linux")]
pub fn get_tcp_info(fd: RawFd) -> io::Result<TCP_INFO> {
    get_opt_sized(fd, libc::IPPROTO_TCP, libc::TCP_INFO)
}

#[cfg(all(unix, not(target_os = "linux")))]
pub fn get_tcp_info(_fd: RawFd) -> io::Result<TCP_INFO> {
    Ok(unsafe { TCP_INFO::new() })
}

#[cfg(windows)]
pub fn get_tcp_info(_fd: RawSocket) -> io::Result<TCP_INFO> {
    Ok(unsafe { TCP_INFO::new() })
}

/// Set the TCP receive buffer size. See SO_RCVBUF.
#[cfg(target_os = "linux")]
pub fn set_recv_buf(fd: RawFd, val: usize) -> Result<()> {
    set_opt(fd, libc::SOL_SOCKET, libc::SO_RCVBUF, val as c_int)
        .or_err(ConnectError, "failed to set SO_RCVBUF")
}

#[cfg(all(unix, not(target_os = "linux")))]
pub fn set_recv_buf(_fd: RawFd, _: usize) -> Result<()> {
    Ok(())
}

#[cfg(windows)]
pub fn set_recv_buf(_sock: RawSocket, _: usize) -> Result<()> {
    Ok(())
}

/// Set the TCP send buffer size. See SO_SNDBUF.
#[cfg(target_os = "linux")]
pub fn set_snd_buf(fd: RawFd, val: usize) -> Result<()> {
    set_opt(fd, libc::SOL_SOCKET, libc::SO_SNDBUF, val as c_int)
        .or_err(ConnectError, "failed to set SO_SNDBUF")
}

#[cfg(all(unix, not(target_os = "linux")))]
pub fn set_snd_buf(_fd: RawFd, _: usize) -> Result<()> {
    Ok(())
}

#[cfg(windows)]
pub fn set_snd_buf(_sock: RawSocket, _: usize) -> Result<()> {
    Ok(())
}

#[cfg(target_os = "linux")]
pub fn get_recv_buf(fd: RawFd) -> io::Result<usize> {
    get_opt_sized::<c_int>(fd, libc::SOL_SOCKET, libc::SO_RCVBUF).map(|v| v as usize)
}

#[cfg(all(unix, not(target_os = "linux")))]
pub fn get_recv_buf(_fd: RawFd) -> io::Result<usize> {
    Ok(0)
}

#[cfg(windows)]
pub fn get_recv_buf(_sock: RawSocket) -> io::Result<usize> {
    Ok(0)
}

#[cfg(target_os = "linux")]
pub fn get_snd_buf(fd: RawFd) -> io::Result<usize> {
    get_opt_sized::<c_int>(fd, libc::SOL_SOCKET, libc::SO_SNDBUF).map(|v| v as usize)
}

#[cfg(all(unix, not(target_os = "linux")))]
pub fn get_snd_buf(_fd: RawFd) -> io::Result<usize> {
    Ok(0)
}

#[cfg(windows)]
pub fn get_snd_buf(_sock: RawSocket) -> io::Result<usize> {
    Ok(0)
}

/// Enable client side TCP fast open.
#[cfg(target_os = "linux")]
pub fn set_tcp_fastopen_connect(fd: RawFd) -> Result<()> {
    set_opt(
        fd,
        libc::IPPROTO_TCP,
        libc::TCP_FASTOPEN_CONNECT,
        1 as c_int,
    )
    .or_err(ConnectError, "failed to set TCP_FASTOPEN_CONNECT")
}

#[cfg(all(unix, not(target_os = "linux")))]
pub fn set_tcp_fastopen_connect(_fd: RawFd) -> Result<()> {
    Ok(())
}

#[cfg(windows)]
pub fn set_tcp_fastopen_connect(_sock: RawSocket) -> Result<()> {
    Ok(())
}

/// Enable server side TCP fast open.
#[cfg(target_os = "linux")]
pub fn set_tcp_fastopen_backlog(fd: RawFd, backlog: usize) -> Result<()> {
    set_opt(fd, libc::IPPROTO_TCP, libc::TCP_FASTOPEN, backlog as c_int)
        .or_err(ConnectError, "failed to set TCP_FASTOPEN")
}

#[cfg(all(unix, not(target_os = "linux")))]
pub fn set_tcp_fastopen_backlog(_fd: RawFd, _backlog: usize) -> Result<()> {
    Ok(())
}

#[cfg(windows)]
pub fn set_tcp_fastopen_backlog(_sock: RawSocket, _backlog: usize) -> Result<()> {
    Ok(())
}

#[cfg(target_os = "linux")]
pub fn set_dscp(fd: RawFd, value: u8) -> Result<()> {
    use super::socket::SocketAddr;
    use pingora_error::OkOrErr;

    let sock = SocketAddr::from_raw_fd(fd, false);
    let addr = sock
        .as_ref()
        .and_then(|s| s.as_inet())
        .or_err(SocketError, "failed to set dscp, invalid IP socket")?;

    if addr.is_ipv6() {
        set_opt(fd, libc::IPPROTO_IPV6, libc::IPV6_TCLASS, value as c_int)
            .or_err(SocketError, "failed to set dscp (IPV6_TCLASS)")
    } else {
        set_opt(fd, libc::IPPROTO_IP, libc::IP_TOS, value as c_int)
            .or_err(SocketError, "failed to set dscp (IP_TOS)")
    }
}

#[cfg(all(unix, not(target_os = "linux")))]
pub fn set_dscp(_fd: RawFd, _value: u8) -> Result<()> {
    Ok(())
}

#[cfg(windows)]
pub fn set_dscp(_sock: RawSocket, _value: u8) -> Result<()> {
    Ok(())
}

#[cfg(target_os = "linux")]
pub fn get_socket_cookie(fd: RawFd) -> io::Result<u64> {
    get_opt_sized::<c_ulonglong>(fd, libc::SOL_SOCKET, libc::SO_COOKIE)
}

#[cfg(all(unix, not(target_os = "linux")))]
pub fn get_socket_cookie(_fd: RawFd) -> io::Result<u64> {
    Ok(0) // SO_COOKIE is a Linux concept
}

#[cfg(target_os = "linux")]
pub fn get_original_dest(fd: RawFd) -> Result<Option<SocketAddr>> {
    use super::socket;
    use pingora_error::OkOrErr;
    use std::net::{SocketAddrV4, SocketAddrV6};

    let sock = socket::SocketAddr::from_raw_fd(fd, false);
    let addr = sock
        .as_ref()
        .and_then(|s| s.as_inet())
        .or_err(SocketError, "failed get original dest, invalid IP socket")?;

    let dest = if addr.is_ipv4() {
        get_opt_sized::<libc::sockaddr_in>(fd, libc::SOL_IP, libc::SO_ORIGINAL_DST).map(|addr| {
            SocketAddr::V4(SocketAddrV4::new(
                u32::from_be(addr.sin_addr.s_addr).into(),
                u16::from_be(addr.sin_port),
            ))
        })
    } else {
        get_opt_sized::<libc::sockaddr_in6>(fd, libc::SOL_IPV6, libc::IP6T_SO_ORIGINAL_DST).map(
            |addr| {
                SocketAddr::V6(SocketAddrV6::new(
                    addr.sin6_addr.s6_addr.into(),
                    u16::from_be(addr.sin6_port),
                    addr.sin6_flowinfo,
                    addr.sin6_scope_id,
                ))
            },
        )
    };
    dest.or_err(SocketError, "failed to get original dest")
        .map(Some)
}

#[cfg(all(unix, not(target_os = "linux")))]
pub fn get_original_dest(_fd: RawFd) -> Result<Option<SocketAddr>> {
    Ok(None)
}

#[cfg(windows)]
pub fn get_original_dest(_sock: RawSocket) -> Result<Option<SocketAddr>> {
    Ok(None)
}

/// connect() to the given address while optionally binding to the specific source address and port range.
///
/// The `set_socket` callback can be used to tune the socket before `connect()` is called.
///
/// If a [`BindTo`] is set with a port range and fallback setting enabled this function will retry
/// on EADDRNOTAVAIL ignoring the port range.
///
/// `IP_BIND_ADDRESS_NO_PORT` is used.
/// `IP_LOCAL_PORT_RANGE` is used if a port range is set on [`BindTo`].
pub(crate) async fn connect_with<F: FnOnce(&TcpSocket) -> Result<()> + Clone>(
    addr: &SocketAddr,
    bind_to: Option<&BindTo>,
    set_socket: F,
) -> Result<TcpStream> {
    if bind_to.as_ref().is_some_and(|b| b.will_fallback()) {
        // if we see an EADDRNOTAVAIL error clear the port range and try again
        let connect_result = inner_connect_with(addr, bind_to, set_socket.clone()).await;
        if let Err(e) = connect_result.as_ref() {
            if matches!(e.etype(), BindError) {
                let mut new_bind_to = BindTo::default();
                new_bind_to.addr = bind_to.as_ref().and_then(|b| b.addr);
                // reset the port range
                new_bind_to.set_port_range(None).unwrap();
                return inner_connect_with(addr, Some(&new_bind_to), set_socket).await;
            }
        }
        connect_result
    } else {
        // not retryable
        inner_connect_with(addr, bind_to, set_socket).await
    }
}

async fn inner_connect_with<F: FnOnce(&TcpSocket) -> Result<()>>(
    addr: &SocketAddr,
    bind_to: Option<&BindTo>,
    set_socket: F,
) -> Result<TcpStream> {
    let socket = if addr.is_ipv4() {
        TcpSocket::new_v4()
    } else {
        TcpSocket::new_v6()
    }
    .or_err(SocketError, "failed to create socket")?;

    #[cfg(unix)]
    {
        ip_bind_addr_no_port(socket.as_raw_fd(), true).or_err(
            SocketError,
            "failed to set socket opts IP_BIND_ADDRESS_NO_PORT",
        )?;

        if let Some(bind_to) = bind_to {
            if let Some((low, high)) = bind_to.port_range() {
                ip_local_port_range(socket.as_raw_fd(), low, high)
                    .or_err(SocketError, "failed to set socket opts IP_LOCAL_PORT_RANGE")?;
            }

            if let Some(baddr) = bind_to.addr {
                socket
                    .bind(baddr)
                    .or_err_with(BindError, || format!("failed to bind to socket {}", baddr))?;
            }
        }
    }

    #[cfg(windows)]
    if let Some(bind_to) = bind_to {
        if let Some(baddr) = bind_to.addr {
            socket
                .bind(baddr)
                .or_err_with(BindError, || format!("failed to bind to socket {}", baddr))?;
        };
    };
    // TODO: add support for bind on other platforms

    set_socket(&socket)?;

    socket
        .connect(*addr)
        .await
        .map_err(|e| wrap_os_connect_error(e, format!("Fail to connect to {}", *addr)))
}

/// connect() to the given address while optionally binding to the specific source address.
///
/// `IP_BIND_ADDRESS_NO_PORT` is used
/// `IP_LOCAL_PORT_RANGE` is used if a port range is set on [`BindTo`].
pub async fn connect(addr: &SocketAddr, bind_to: Option<&BindTo>) -> Result<TcpStream> {
    connect_with(addr, bind_to, |_| Ok(())).await
}

/// connect() to the given Unix domain socket
#[cfg(unix)]
pub async fn connect_uds(path: &std::path::Path) -> Result<UnixStream> {
    UnixStream::connect(path)
        .await
        .map_err(|e| wrap_os_connect_error(e, format!("Fail to connect to {}", path.display())))
}

fn wrap_os_connect_error(e: std::io::Error, context: String) -> Box<Error> {
    match e.kind() {
        ErrorKind::ConnectionRefused => Error::because(ConnectRefused, context, e),
        ErrorKind::TimedOut => Error::because(ConnectTimedout, context, e),
        ErrorKind::AddrNotAvailable => Error::because(BindError, context, e),
        ErrorKind::PermissionDenied | ErrorKind::AddrInUse => {
            Error::because(InternalError, context, e)
        }
        _ => match e.raw_os_error() {
            Some(libc::ENETUNREACH | libc::EHOSTUNREACH) => {
                Error::because(ConnectNoRoute, context, e)
            }
            _ => Error::because(ConnectError, context, e),
        },
    }
}

/// The configuration for TCP keepalive
#[derive(Clone, Debug)]
pub struct TcpKeepalive {
    /// The time a connection needs to be idle before TCP begins sending out keep-alive probes.
    pub idle: Duration,
    /// The number of seconds between TCP keep-alive probes.
    pub interval: Duration,
    /// The maximum number of TCP keep-alive probes to send before giving up and killing the connection
    pub count: usize,
    /// the maximum amount of time in milliseconds that transmitted data may
    /// remain unacknowledged, or buffered data may remain untransmitted (due to
    /// zero window size) before TCP will forcibly close the corresponding
    /// connection and return ETIMEDOUT. If the value is specified as 0 (the
    /// default), TCP will use the system default.
    #[cfg(target_os = "linux")]
    pub user_timeout: Duration,
}

impl std::fmt::Display for TcpKeepalive {
    #[cfg(target_os = "linux")]
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "{:?}/{:?}/{}/{:?}",
            self.idle, self.interval, self.count, self.user_timeout
        )
    }
    #[cfg(not(target_os = "linux"))]
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{:?}/{:?}/{}", self.idle, self.interval, self.count)
    }
}

/// Apply the given TCP keepalive settings to the given connection
pub fn set_tcp_keepalive(stream: &TcpStream, ka: &TcpKeepalive) -> Result<()> {
    #[cfg(unix)]
    let raw = stream.as_raw_fd();
    #[cfg(windows)]
    let raw = stream.as_raw_socket();
    // TODO: check localhost or if keepalive is already set
    set_keepalive(raw, ka).or_err(ConnectError, "failed to set keepalive")
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn test_set_recv_buf() {
        use tokio::net::TcpSocket;
        let socket = TcpSocket::new_v4().unwrap();
        #[cfg(unix)]
        set_recv_buf(socket.as_raw_fd(), 102400).unwrap();
        #[cfg(windows)]
        set_recv_buf(socket.as_raw_socket(), 102400).unwrap();

        #[cfg(target_os = "linux")]
        {
            // kernel doubles whatever is set
            assert_eq!(get_recv_buf(socket.as_raw_fd()).unwrap(), 102400 * 2);
        }
    }

    #[cfg(target_os = "linux")]
    #[ignore] // this test requires the Linux system to have net.ipv4.tcp_fastopen set
    #[tokio::test]
    async fn test_set_fast_open() {
        use std::time::Instant;

        // connect once to make sure their is a SYN cookie to use for TFO
        connect_with(&"1.1.1.1:80".parse().unwrap(), None, |socket| {
            set_tcp_fastopen_connect(socket.as_raw_fd())
        })
        .await
        .unwrap();

        let start = Instant::now();
        connect_with(&"1.1.1.1:80".parse().unwrap(), None, |socket| {
            set_tcp_fastopen_connect(socket.as_raw_fd())
        })
        .await
        .unwrap();
        let connection_time = start.elapsed();

        // connect() return right away as the SYN goes out only when the first write() is called.
        assert!(connection_time.as_millis() < 4);
    }
}


================================================
FILE: pingora-core/src/protocols/l4/listener.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Listeners

use std::io;
#[cfg(unix)]
use std::os::unix::io::AsRawFd;
#[cfg(windows)]
use std::os::windows::io::AsRawSocket;
use tokio::net::TcpListener;
#[cfg(unix)]
use tokio::net::UnixListener;

use crate::protocols::digest::{GetSocketDigest, SocketDigest};
use crate::protocols::l4::stream::Stream;

/// The type for generic listener for both TCP and Unix domain socket
#[derive(Debug)]
pub enum Listener {
    Tcp(TcpListener),
    #[cfg(unix)]
    Unix(UnixListener),
}

impl From<TcpListener> for Listener {
    fn from(s: TcpListener) -> Self {
        Self::Tcp(s)
    }
}

#[cfg(unix)]
impl From<UnixListener> for Listener {
    fn from(s: UnixListener) -> Self {
        Self::Unix(s)
    }
}

#[cfg(unix)]
impl AsRawFd for Listener {
    fn as_raw_fd(&self) -> std::os::unix::io::RawFd {
        match &self {
            Self::Tcp(l) => l.as_raw_fd(),
            Self::Unix(l) => l.as_raw_fd(),
        }
    }
}

#[cfg(windows)]
impl AsRawSocket for Listener {
    fn as_raw_socket(&self) -> std::os::windows::io::RawSocket {
        match &self {
            Self::Tcp(l) => l.as_raw_socket(),
        }
    }
}

impl Listener {
    /// Accept a connection from the listening endpoint
    pub async fn accept(&self) -> io::Result<Stream> {
        match &self {
            Self::Tcp(l) => l.accept().await.map(|(stream, peer_addr)| {
                let mut s: Stream = stream.into();
                #[cfg(unix)]
                let digest = SocketDigest::from_raw_fd(s.as_raw_fd());
                #[cfg(windows)]
                let digest = SocketDigest::from_raw_socket(s.as_raw_socket());
                digest
                    .peer_addr
                    .set(Some(peer_addr.into()))
                    .expect("newly created OnceCell must be empty");
                s.set_socket_digest(digest);
                // TODO: if listening on a specific bind address, we could save
                // an extra syscall looking up the local_addr later if we can pass
                // and init it in the socket digest here
                s
            }),
            #[cfg(unix)]
            Self::Unix(l) => l.accept().await.map(|(stream, peer_addr)| {
                let mut s: Stream = stream.into();
                let digest = SocketDigest::from_raw_fd(s.as_raw_fd());
                // note: if unnamed/abstract UDS, it will be `None`
                // (see TryFrom<tokio::net::unix::SocketAddr>)
                let addr = peer_addr.try_into().ok();
                digest
                    .peer_addr
                    .set(addr)
                    .expect("newly created OnceCell must be empty");
                s.set_socket_digest(digest);
                s
            }),
        }
    }
}


================================================
FILE: pingora-core/src/protocols/l4/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Transport layer protocol implementation

pub mod ext;
pub mod listener;
pub mod socket;
pub mod stream;
pub mod virt;


================================================
FILE: pingora-core/src/protocols/l4/socket.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Generic socket type

use crate::{Error, OrErr};
use log::warn;
#[cfg(unix)]
use nix::sys::socket::{getpeername, getsockname, SockaddrStorage};
use std::cmp::Ordering;
use std::hash::{Hash, Hasher};
use std::net::SocketAddr as StdSockAddr;
#[cfg(unix)]
use std::os::unix::net::SocketAddr as StdUnixSockAddr;
#[cfg(unix)]
use tokio::net::unix::SocketAddr as TokioUnixSockAddr;

/// [`SocketAddr`] is a storage type that contains either an Internet (IP address)
/// socket address or a Unix domain socket address.
#[derive(Debug, Clone)]
pub enum SocketAddr {
    Inet(StdSockAddr),
    #[cfg(unix)]
    Unix(StdUnixSockAddr),
}

impl SocketAddr {
    /// Get a reference to the IP socket if it is one
    pub fn as_inet(&self) -> Option<&StdSockAddr> {
        if let SocketAddr::Inet(addr) = self {
            Some(addr)
        } else {
            None
        }
    }

    /// Get a reference to the Unix domain socket if it is one
    #[cfg(unix)]
    pub fn as_unix(&self) -> Option<&StdUnixSockAddr> {
        if let SocketAddr::Unix(addr) = self {
            Some(addr)
        } else {
            None
        }
    }

    /// Set the port if the address is an IP socket.
    pub fn set_port(&mut self, port: u16) {
        if let SocketAddr::Inet(addr) = self {
            addr.set_port(port)
        }
    }

    #[cfg(unix)]
    fn from_sockaddr_storage(sock: &SockaddrStorage) -> Option<SocketAddr> {
        if let Some(v4) = sock.as_sockaddr_in() {
            return Some(SocketAddr::Inet(StdSockAddr::V4(
                std::net::SocketAddrV4::new(v4.ip().into(), v4.port()),
            )));
        } else if let Some(v6) = sock.as_sockaddr_in6() {
            return Some(SocketAddr::Inet(StdSockAddr::V6(
                std::net::SocketAddrV6::new(v6.ip(), v6.port(), v6.flowinfo(), v6.scope_id()),
            )));
        }

        // TODO: don't set abstract / unnamed for now,
        // for parity with how we treat these types in TryFrom<TokioUnixSockAddr>
        Some(SocketAddr::Unix(
            sock.as_unix_addr()
                .map(|addr| addr.path().map(StdUnixSockAddr::from_pathname))??
                .ok()?,
        ))
    }

    #[cfg(unix)]
    pub fn from_raw_fd(fd: std::os::unix::io::RawFd, peer_addr: bool) -> Option<SocketAddr> {
        let sockaddr_storage = if peer_addr {
            getpeername(fd)
        } else {
            getsockname(fd)
        };
        match sockaddr_storage {
            Ok(sockaddr) => Self::from_sockaddr_storage(&sockaddr),
            // could be errors such as EBADF, i.e. fd is no longer a valid socket
            // fail open in this case
            Err(_e) => None,
        }
    }

    #[cfg(windows)]
    pub fn from_raw_socket(
        sock: std::os::windows::io::RawSocket,
        is_peer_addr: bool,
    ) -> Option<SocketAddr> {
        use crate::protocols::windows::{local_addr, peer_addr};
        if is_peer_addr {
            peer_addr(sock)
        } else {
            local_addr(sock)
        }
        .map(|s| s.into())
        .ok()
    }
}

impl std::fmt::Display for SocketAddr {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        match self {
            SocketAddr::Inet(addr) => write!(f, "{addr}"),
            #[cfg(unix)]
            SocketAddr::Unix(addr) => {
                if let Some(path) = addr.as_pathname() {
                    write!(f, "{}", path.display())
                } else {
                    write!(f, "{addr:?}")
                }
            }
        }
    }
}

impl Hash for SocketAddr {
    fn hash<H: Hasher>(&self, state: &mut H) {
        match self {
            Self::Inet(sockaddr) => sockaddr.hash(state),
            #[cfg(unix)]
            Self::Unix(sockaddr) => {
                if let Some(path) = sockaddr.as_pathname() {
                    // use the underlying path as the hash
                    path.hash(state);
                } else {
                    // unnamed or abstract UDS
                    // abstract UDS name not yet exposed by std API
                    // panic for now, we can decide on the right way to hash them later
                    panic!("Unnamed and abstract UDS types not yet supported for hashing")
                }
            }
        }
    }
}

impl PartialEq for SocketAddr {
    fn eq(&self, other: &Self) -> bool {
        match self {
            Self::Inet(addr) => Some(addr) == other.as_inet(),
            #[cfg(unix)]
            Self::Unix(addr) => {
                let path = addr.as_pathname();
                // can only compare UDS with path, assume false on all unnamed UDS
                path.is_some() && path == other.as_unix().and_then(|addr| addr.as_pathname())
            }
        }
    }
}

impl PartialOrd for SocketAddr {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
    }
}

impl Ord for SocketAddr {
    fn cmp(&self, other: &Self) -> Ordering {
        match self {
            Self::Inet(addr) => {
                if let Some(o) = other.as_inet() {
                    addr.cmp(o)
                } else {
                    // always make Inet < Unix "smallest for variants at the top"
                    Ordering::Less
                }
            }
            #[cfg(unix)]
            Self::Unix(addr) => {
                if let Some(o) = other.as_unix() {
                    // NOTE: unnamed UDS are consider the same
                    addr.as_pathname().cmp(&o.as_pathname())
                } else {
                    // always make Inet < Unix "smallest for variants at the top"
                    Ordering::Greater
                }
            }
        }
    }
}

impl Eq for SocketAddr {}

impl std::str::FromStr for SocketAddr {
    type Err = Box<Error>;

    // This is very basic parsing logic, it might treat invalid IP:PORT str as UDS path
    #[cfg(unix)]
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        if s.starts_with("unix:") {
            // format unix:/tmp/server.socket
            let path = s.trim_start_matches("unix:");
            let uds_socket = StdUnixSockAddr::from_pathname(path)
                .or_err(crate::BindError, "invalid UDS path")?;
            Ok(SocketAddr::Unix(uds_socket))
        } else {
            match StdSockAddr::from_str(s) {
                Ok(addr) => Ok(SocketAddr::Inet(addr)),
                Err(_) => {
                    // Try to parse as UDS for backward compatibility
                    let uds_socket = StdUnixSockAddr::from_pathname(s)
                        .or_err(crate::BindError, "invalid UDS path")?;
                    warn!("Raw Unix domain socket path support will be deprecated, add 'unix:' prefix instead");
                    Ok(SocketAddr::Unix(uds_socket))
                }
            }
        }
    }

    #[cfg(windows)]
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        let addr = StdSockAddr::from_str(s).or_err(crate::BindError, "invalid socket addr")?;
        Ok(SocketAddr::Inet(addr))
    }
}

impl std::net::ToSocketAddrs for SocketAddr {
    type Iter = std::iter::Once<StdSockAddr>;

    // Error if UDS addr
    fn to_socket_addrs(&self) -> std::io::Result<Self::Iter> {
        if let Some(inet) = self.as_inet() {
            Ok(std::iter::once(*inet))
        } else {
            Err(std::io::Error::other(
                "UDS socket cannot be used as inet socket",
            ))
        }
    }
}

impl From<StdSockAddr> for SocketAddr {
    fn from(sockaddr: StdSockAddr) -> Self {
        SocketAddr::Inet(sockaddr)
    }
}

#[cfg(unix)]
impl From<StdUnixSockAddr> for SocketAddr {
    fn from(sockaddr: StdUnixSockAddr) -> Self {
        SocketAddr::Unix(sockaddr)
    }
}

// TODO: ideally mio/tokio will start using the std version of the unix `SocketAddr`
// so we can avoid a fallible conversion
// https://github.com/tokio-rs/mio/issues/1527
#[cfg(unix)]
impl TryFrom<TokioUnixSockAddr> for SocketAddr {
    type Error = String;

    fn try_from(value: TokioUnixSockAddr) -> Result<Self, Self::Error> {
        if let Some(Ok(addr)) = value.as_pathname().map(StdUnixSockAddr::from_pathname) {
            Ok(addr.into())
        } else {
            // may be unnamed/abstract UDS
            Err(format!("could not convert {value:?} to SocketAddr"))
        }
    }
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn parse_ip() {
        let ip: SocketAddr = "127.0.0.1:80".parse().unwrap();
        assert!(ip.as_inet().is_some());
    }

    #[cfg(unix)]
    #[test]
    fn parse_uds() {
        let uds: SocketAddr = "/tmp/my.sock".parse().unwrap();
        assert!(uds.as_unix().is_some());
    }

    #[cfg(unix)]
    #[test]
    fn parse_uds_with_prefix() {
        let uds: SocketAddr = "unix:/tmp/my.sock".parse().unwrap();
        assert!(uds.as_unix().is_some());
    }
}


================================================
FILE: pingora-core/src/protocols/l4/stream.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Transport layer connection

use async_trait::async_trait;
use futures::FutureExt;
use log::{debug, error};

use pingora_error::{ErrorType::*, OrErr, Result};
#[cfg(target_os = "linux")]
use std::io::IoSliceMut;
#[cfg(unix)]
use std::os::unix::io::AsRawFd;
#[cfg(windows)]
use std::os::windows::io::AsRawSocket;
use std::pin::Pin;
use std::sync::Arc;
use std::task::{Context, Poll};
use std::time::{Duration, Instant, SystemTime};
#[cfg(target_os = "linux")]
use tokio::io::Interest;
use tokio::io::{self, AsyncRead, AsyncWrite, AsyncWriteExt, BufStream, ReadBuf};
use tokio::net::TcpStream;
#[cfg(unix)]
use tokio::net::UnixStream;

use crate::protocols::l4::ext::{set_tcp_keepalive, TcpKeepalive};
use crate::protocols::l4::virt;
use crate::protocols::raw_connect::ProxyDigest;
use crate::protocols::{
    GetProxyDigest, GetSocketDigest, GetTimingDigest, Peek, Shutdown, SocketDigest, Ssl,
    TimingDigest, UniqueID, UniqueIDType,
};
use crate::upstreams::peer::Tracer;

#[derive(Debug)]
enum RawStream {
    Tcp(TcpStream),
    #[cfg(unix)]
    Unix(UnixStream),
    Virtual(virt::VirtualSocketStream),
}

impl AsyncRead for RawStream {
    fn poll_read(
        self: Pin<&mut Self>,
        cx: &mut Context<'_>,
        buf: &mut ReadBuf<'_>,
    ) -> Poll<io::Result<()>> {
        // Safety: Basic enum pin projection
        unsafe {
            match &mut Pin::get_unchecked_mut(self) {
                RawStream::Tcp(s) => Pin::new_unchecked(s).poll_read(cx, buf),
                #[cfg(unix)]
                RawStream::Unix(s) => Pin::new_unchecked(s).poll_read(cx, buf),
                RawStream::Virtual(s) => Pin::new_unchecked(s).poll_read(cx, buf),
            }
        }
    }
}

impl AsyncWrite for RawStream {
    fn poll_write(self: Pin<&mut Self>, cx: &mut Context, buf: &[u8]) -> Poll<io::Result<usize>> {
        // Safety: Basic enum pin projection
        unsafe {
            match &mut Pin::get_unchecked_mut(self) {
                RawStream::Tcp(s) => Pin::new_unchecked(s).poll_write(cx, buf),
                #[cfg(unix)]
                RawStream::Unix(s) => Pin::new_unchecked(s).poll_write(cx, buf),
                RawStream::Virtual(s) => Pin::new_unchecked(s).poll_write(cx, buf),
            }
        }
    }

    fn poll_flush(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>> {
        // Safety: Basic enum pin projection
        unsafe {
            match &mut Pin::get_unchecked_mut(self) {
                RawStream::Tcp(s) => Pin::new_unchecked(s).poll_flush(cx),
                #[cfg(unix)]
                RawStream::Unix(s) => Pin::new_unchecked(s).poll_flush(cx),
                RawStream::Virtual(s) => Pin::new_unchecked(s).poll_flush(cx),
            }
        }
    }

    fn poll_shutdown(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>> {
        // Safety: Basic enum pin projection
        unsafe {
            match &mut Pin::get_unchecked_mut(self) {
                RawStream::Tcp(s) => Pin::new_unchecked(s).poll_shutdown(cx),
                #[cfg(unix)]
                RawStream::Unix(s) => Pin::new_unchecked(s).poll_shutdown(cx),
                RawStream::Virtual(s) => Pin::new_unchecked(s).poll_shutdown(cx),
            }
        }
    }

    fn poll_write_vectored(
        self: Pin<&mut Self>,
        cx: &mut Context<'_>,
        bufs: &[std::io::IoSlice<'_>],
    ) -> Poll<io::Result<usize>> {
        // Safety: Basic enum pin projection
        unsafe {
            match &mut Pin::get_unchecked_mut(self) {
                RawStream::Tcp(s) => Pin::new_unchecked(s).poll_write_vectored(cx, bufs),
                #[cfg(unix)]
                RawStream::Unix(s) => Pin::new_unchecked(s).poll_write_vectored(cx, bufs),
                RawStream::Virtual(s) => Pin::new_unchecked(s).poll_write_vectored(cx, bufs),
            }
        }
    }

    fn is_write_vectored(&self) -> bool {
        match self {
            RawStream::Tcp(s) => s.is_write_vectored(),
            #[cfg(unix)]
            RawStream::Unix(s) => s.is_write_vectored(),
            RawStream::Virtual(s) => s.is_write_vectored(),
        }
    }
}

#[cfg(unix)]
impl AsRawFd for RawStream {
    fn as_raw_fd(&self) -> std::os::unix::io::RawFd {
        match self {
            RawStream::Tcp(s) => s.as_raw_fd(),
            RawStream::Unix(s) => s.as_raw_fd(),
            RawStream::Virtual(_) => -1, // Virtual stream does not have a real fd
        }
    }
}

#[cfg(windows)]
impl AsRawSocket for RawStream {
    fn as_raw_socket(&self) -> std::os::windows::io::RawSocket {
        match self {
            RawStream::Tcp(s) => s.as_raw_socket(),
            // Virtual stream does not have a real socket, return INVALID_SOCKET (!0)
            RawStream::Virtual(_) => !0,
        }
    }
}

#[derive(Debug)]
struct RawStreamWrapper {
    pub(crate) stream: RawStream,
    /// store the last rx timestamp of the stream.
    pub(crate) rx_ts: Option<SystemTime>,
    /// enable reading rx timestamp
    #[cfg(target_os = "linux")]
    pub(crate) enable_rx_ts: bool,
    #[cfg(target_os = "linux")]
    /// This can be reused across multiple recvmsg calls. The cmsg buffer may
    /// come from old sockets created by older version of pingora and so,
    /// this vector can only grow.
    reusable_cmsg_space: Vec<u8>,
}

impl RawStreamWrapper {
    pub fn new(stream: RawStream) -> Self {
        RawStreamWrapper {
            stream,
            rx_ts: None,
            #[cfg(target_os = "linux")]
            enable_rx_ts: false,
            #[cfg(target_os = "linux")]
            reusable_cmsg_space: nix::cmsg_space!(nix::sys::time::TimeSpec),
        }
    }

    #[cfg(target_os = "linux")]
    pub fn enable_rx_ts(&mut self, enable_rx_ts: bool) {
        self.enable_rx_ts = enable_rx_ts;
    }
}

impl AsyncRead for RawStreamWrapper {
    #[cfg(not(target_os = "linux"))]
    fn poll_read(
        self: Pin<&mut Self>,
        cx: &mut Context<'_>,
        buf: &mut ReadBuf<'_>,
    ) -> Poll<io::Result<()>> {
        // Safety: Basic enum pin projection
        unsafe {
            let rs_wrapper = Pin::get_unchecked_mut(self);
            match &mut rs_wrapper.stream {
                RawStream::Tcp(s) => Pin::new_unchecked(s).poll_read(cx, buf),
                #[cfg(unix)]
                RawStream::Unix(s) => Pin::new_unchecked(s).poll_read(cx, buf),
                RawStream::Virtual(s) => Pin::new_unchecked(s).poll_read(cx, buf),
            }
        }
    }

    #[cfg(target_os = "linux")]
    fn poll_read(
        self: Pin<&mut Self>,
        cx: &mut Context<'_>,
        buf: &mut ReadBuf<'_>,
    ) -> Poll<io::Result<()>> {
        use futures::ready;
        use nix::sys::socket::{recvmsg, ControlMessageOwned, MsgFlags, SockaddrStorage};

        // if we do not need rx timestamp, then use the standard path
        if !self.enable_rx_ts {
            // Safety: Basic enum pin projection
            unsafe {
                let rs_wrapper = Pin::get_unchecked_mut(self);
                match &mut rs_wrapper.stream {
                    RawStream::Tcp(s) => return Pin::new_unchecked(s).poll_read(cx, buf),
                    RawStream::Unix(s) => return Pin::new_unchecked(s).poll_read(cx, buf),
                    RawStream::Virtual(s) => return Pin::new_unchecked(s).poll_read(cx, buf),
                }
            }
        }

        // Safety: Basic pin projection to get mutable stream
        let rs_wrapper = unsafe { Pin::get_unchecked_mut(self) };
        match &mut rs_wrapper.stream {
            RawStream::Tcp(s) => {
                loop {
                    ready!(s.poll_read_ready(cx))?;
                    // Safety: maybe uninitialized bytes will only be passed to recvmsg
                    let b = unsafe {
                        &mut *(buf.unfilled_mut() as *mut [std::mem::MaybeUninit<u8>]
                            as *mut [u8])
                    };
                    let mut iov = [IoSliceMut::new(b)];
                    rs_wrapper.reusable_cmsg_space.clear();

                    match s.try_io(Interest::READABLE, || {
                        recvmsg::<SockaddrStorage>(
                            s.as_raw_fd(),
                            &mut iov,
                            Some(&mut rs_wrapper.reusable_cmsg_space),
                            MsgFlags::empty(),
                        )
                        .map_err(|errno| errno.into())
                    }) {
                        Ok(r) => {
                            if let Some(ControlMessageOwned::ScmTimestampsns(rtime)) = r
                                .cmsgs()
                                .find(|i| matches!(i, ControlMessageOwned::ScmTimestampsns(_)))
                            {
                                // The returned timestamp is a real (i.e. not monotonic) timestamp
                                // https://docs.kernel.org/networking/timestamping.html
                                rs_wrapper.rx_ts =
                                    SystemTime::UNIX_EPOCH.checked_add(rtime.system.into());
                            }
                            // Safety: We trust `recvmsg` to have filled up `r.bytes` bytes in the buffer.
                            unsafe {
                                buf.assume_init(r.bytes);
                            }
                            buf.advance(r.bytes);
                            return Poll::Ready(Ok(()));
                        }
                        Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => continue,
                        Err(e) => return Poll::Ready(Err(e)),
                    }
                }
            }
            // Unix RX timestamp only works with datagram for now, so we do not care about it
            RawStream::Unix(s) => unsafe { Pin::new_unchecked(s).poll_read(cx, buf) },
            RawStream::Virtual(s) => unsafe { Pin::new_unchecked(s).poll_read(cx, buf) },
        }
    }
}

impl AsyncWrite for RawStreamWrapper {
    fn poll_write(self: Pin<&mut Self>, cx: &mut Context, buf: &[u8]) -> Poll<io::Result<usize>> {
        // Safety: Basic enum pin projection
        unsafe {
            match &mut Pin::get_unchecked_mut(self).stream {
                RawStream::Tcp(s) => Pin::new_unchecked(s).poll_write(cx, buf),
                #[cfg(unix)]
                RawStream::Unix(s) => Pin::new_unchecked(s).poll_write(cx, buf),
                RawStream::Virtual(s) => Pin::new_unchecked(s).poll_write(cx, buf),
            }
        }
    }

    fn poll_flush(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>> {
        // Safety: Basic enum pin projection
        unsafe {
            match &mut Pin::get_unchecked_mut(self).stream {
                RawStream::Tcp(s) => Pin::new_unchecked(s).poll_flush(cx),
                #[cfg(unix)]
                RawStream::Unix(s) => Pin::new_unchecked(s).poll_flush(cx),
                RawStream::Virtual(s) => Pin::new_unchecked(s).poll_flush(cx),
            }
        }
    }

    fn poll_shutdown(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>> {
        // Safety: Basic enum pin projection
        unsafe {
            match &mut Pin::get_unchecked_mut(self).stream {
                RawStream::Tcp(s) => Pin::new_unchecked(s).poll_shutdown(cx),
                #[cfg(unix)]
                RawStream::Unix(s) => Pin::new_unchecked(s).poll_shutdown(cx),
                RawStream::Virtual(s) => Pin::new_unchecked(s).poll_shutdown(cx),
            }
        }
    }

    fn poll_write_vectored(
        self: Pin<&mut Self>,
        cx: &mut Context<'_>,
        bufs: &[std::io::IoSlice<'_>],
    ) -> Poll<io::Result<usize>> {
        // Safety: Basic enum pin projection
        unsafe {
            match &mut Pin::get_unchecked_mut(self).stream {
                RawStream::Tcp(s) => Pin::new_unchecked(s).poll_write_vectored(cx, bufs),
                #[cfg(unix)]
                RawStream::Unix(s) => Pin::new_unchecked(s).poll_write_vectored(cx, bufs),
                RawStream::Virtual(s) => Pin::new_unchecked(s).poll_write_vectored(cx, bufs),
            }
        }
    }

    fn is_write_vectored(&self) -> bool {
        self.stream.is_write_vectored()
    }
}

#[cfg(unix)]
impl AsRawFd for RawStreamWrapper {
    fn as_raw_fd(&self) -> std::os::unix::io::RawFd {
        self.stream.as_raw_fd()
    }
}

#[cfg(windows)]
impl AsRawSocket for RawStreamWrapper {
    fn as_raw_socket(&self) -> std::os::windows::io::RawSocket {
        self.stream.as_raw_socket()
    }
}

// Large read buffering helps reducing syscalls with little trade-off
// Ssl layer always does "small" reads in 16k (TLS record size) so L4 read buffer helps a lot.
const BUF_READ_SIZE: usize = 64 * 1024;
// Small write buf to match MSS. Too large write buf delays real time communication.
// This buffering effectively implements something similar to Nagle's algorithm.
// The benefit is that user space can control when to flush, where Nagle's can't be controlled.
// And userspace buffering reduce both syscalls and small packets.
const BUF_WRITE_SIZE: usize = 1460;

// NOTE: with writer buffering, users need to call flush() to make sure the data is actually
// sent. Otherwise data could be stuck in the buffer forever or get lost when stream is closed.

/// A concrete type for transport layer connection + extra fields for logging
#[derive(Debug)]
pub struct Stream {
    // Use `Option` to be able to swap to adjust the buffer size. Always safe to unwrap
    stream: Option<BufStream<RawStreamWrapper>>,
    // the data put back at the front of the read buffer, in order to replay the read
    rewind_read_buf: Vec<Vec<u8>>,
    buffer_write: bool,
    proxy_digest: Option<Arc<ProxyDigest>>,
    socket_digest: Option<Arc<SocketDigest>>,
    /// When this connection is established
    pub established_ts: SystemTime,
    /// The distributed tracing object for this stream
    pub tracer: Option<Tracer>,
    read_pending_time: AccumulatedDuration,
    write_pending_time: AccumulatedDuration,
    /// Last rx timestamp associated with the last recvmsg call.
    pub rx_ts: Option<SystemTime>,
}

impl Stream {
    fn stream(&self) -> &BufStream<RawStreamWrapper> {
        self.stream.as_ref().expect("stream should always be set")
    }

    fn stream_mut(&mut self) -> &mut BufStream<RawStreamWrapper> {
        self.stream.as_mut().expect("stream should always be set")
    }

    /// set TCP nodelay for this connection if `self` is TCP
    pub fn set_nodelay(&mut self) -> Result<()> {
        match &self.stream_mut().get_mut().stream {
            RawStream::Tcp(s) => {
                s.set_nodelay(true)
                    .or_err(ConnectError, "failed to set_nodelay")?;
            }
            RawStream::Virtual(s) => {
                s.set_socket_option(virt::VirtualSockOpt::NoDelay)
                    .or_err(ConnectError, "failed to set_nodelay on virtual socket")?;
            }
            _ => (),
        }
        Ok(())
    }

    /// set TCP keepalive settings for this connection if `self` is TCP
    pub fn set_keepalive(&mut self, ka: &TcpKeepalive) -> Result<()> {
        match &self.stream_mut().get_mut().stream {
            RawStream::Tcp(s) => {
                debug!("Setting tcp keepalive");
                set_tcp_keepalive(s, ka)?;
            }
            RawStream::Virtual(s) => {
                s.set_socket_option(virt::VirtualSockOpt::KeepAlive(ka.clone()))
                    .or_err(ConnectError, "failed to set_keepalive on virtual socket")?;
            }
            _ => (),
        }
        Ok(())
    }

    #[cfg(target_os = "linux")]
    pub fn set_rx_timestamp(&mut self) -> Result<()> {
        use nix::sys::socket::{setsockopt, sockopt, TimestampingFlag};

        if let RawStream::Tcp(s) = &self.stream_mut().get_mut().stream {
            let timestamp_options = TimestampingFlag::SOF_TIMESTAMPING_RX_SOFTWARE
                | TimestampingFlag::SOF_TIMESTAMPING_SOFTWARE;
            setsockopt(s.as_raw_fd(), sockopt::Timestamping, &timestamp_options)
                .or_err(InternalError, "failed to set SOF_TIMESTAMPING_RX_SOFTWARE")?;
            self.stream_mut().get_mut().enable_rx_ts(true);
        }

        Ok(())
    }

    #[cfg(not(target_os = "linux"))]
    pub fn set_rx_timestamp(&mut self) -> io::Result<()> {
        Ok(())
    }

    /// Put Some data back to the head of the stream to be read again
    pub(crate) fn rewind(&mut self, data: &[u8]) {
        if !data.is_empty() {
            self.rewind_read_buf.push(data.to_vec());
        }
    }

    /// Set the buffer of BufStream
    /// It is only set later because of the malloc overhead in critical accept() path
    pub(crate) fn set_buffer(&mut self) {
        use std::mem;
        // Since BufStream doesn't provide an API to adjust the buf directly,
        // we take the raw stream out of it and put it in a new BufStream with the size we want
        let stream = mem::take(&mut self.stream);
        let stream =
            stream.map(|s| BufStream::with_capacity(BUF_READ_SIZE, BUF_WRITE_SIZE, s.into_inner()));
        let _ = mem::replace(&mut self.stream, stream);
    }
}

impl From<TcpStream> for Stream {
    fn from(s: TcpStream) -> Self {
        Stream {
            stream: Some(BufStream::with_capacity(
                0,
                0,
                RawStreamWrapper::new(RawStream::Tcp(s)),
            )),
            rewind_read_buf: Vec::new(),
            buffer_write: true,
            established_ts: SystemTime::now(),
            proxy_digest: None,
            socket_digest: None,
            tracer: None,
            read_pending_time: AccumulatedDuration::new(),
            write_pending_time: AccumulatedDuration::new(),
            rx_ts: None,
        }
    }
}

impl From<virt::VirtualSocketStream> for Stream {
    fn from(s: virt::VirtualSocketStream) -> Self {
        Stream {
            stream: Some(BufStream::with_capacity(
                0,
                0,
                RawStreamWrapper::new(RawStream::Virtual(s)),
            )),
            rewind_read_buf: Vec::new(),
            buffer_write: true,
            established_ts: SystemTime::now(),
            proxy_digest: None,
            socket_digest: None,
            tracer: None,
            read_pending_time: AccumulatedDuration::new(),
            write_pending_time: AccumulatedDuration::new(),
            rx_ts: None,
        }
    }
}

#[cfg(unix)]
impl From<UnixStream> for Stream {
    fn from(s: UnixStream) -> Self {
        Stream {
            stream: Some(BufStream::with_capacity(
                0,
                0,
                RawStreamWrapper::new(RawStream::Unix(s)),
            )),
            rewind_read_buf: Vec::new(),
            buffer_write: true,
            established_ts: SystemTime::now(),
            proxy_digest: None,
            socket_digest: None,
            tracer: None,
            read_pending_time: AccumulatedDuration::new(),
            write_pending_time: AccumulatedDuration::new(),
            rx_ts: None,
        }
    }
}

#[cfg(unix)]
impl AsRawFd for Stream {
    fn as_raw_fd(&self) -> std::os::unix::io::RawFd {
        self.stream().get_ref().as_raw_fd()
    }
}

#[cfg(windows)]
impl AsRawSocket for Stream {
    fn as_raw_socket(&self) -> std::os::windows::io::RawSocket {
        self.stream().get_ref().as_raw_socket()
    }
}

#[cfg(unix)]
impl UniqueID for Stream {
    fn id(&self) -> UniqueIDType {
        self.as_raw_fd()
    }
}

#[cfg(windows)]
impl UniqueID for Stream {
    fn id(&self) -> usize {
        self.as_raw_socket() as usize
    }
}

impl Ssl for Stream {}

#[async_trait]
impl Peek for Stream {
    async fn try_peek(&mut self, buf: &mut [u8]) -> std::io::Result<bool> {
        use tokio::io::AsyncReadExt;
        self.read_exact(buf).await?;
        // rewind regardless of what is read
        self.rewind(buf);
        Ok(true)
    }
}

#[async_trait]
impl Shutdown for Stream {
    async fn shutdown(&mut self) {
        AsyncWriteExt::shutdown(self).await.unwrap_or_else(|e| {
            debug!("Failed to shutdown connection: {:?}", e);
        });
    }
}

impl GetTimingDigest for Stream {
    fn get_timing_digest(&self) -> Vec<Option<TimingDigest>> {
        let mut digest = Vec::with_capacity(2); // expect to have both L4 stream and TLS layer
        digest.push(Some(TimingDigest {
            established_ts: self.established_ts,
        }));
        digest
    }

    fn get_read_pending_time(&self) -> Duration {
        self.read_pending_time.total
    }

    fn get_write_pending_time(&self) -> Duration {
        self.write_pending_time.total
    }
}

impl GetProxyDigest for Stream {
    fn get_proxy_digest(&self) -> Option<Arc<ProxyDigest>> {
        self.proxy_digest.clone()
    }

    fn set_proxy_digest(&mut self, digest: ProxyDigest) {
        self.proxy_digest = Some(Arc::new(digest));
    }
}

impl GetSocketDigest for Stream {
    fn get_socket_digest(&self) -> Option<Arc<SocketDigest>> {
        self.socket_digest.clone()
    }

    fn set_socket_digest(&mut self, socket_digest: SocketDigest) {
        self.socket_digest = Some(Arc::new(socket_digest))
    }
}

impl Drop for Stream {
    fn drop(&mut self) {
        if let Some(t) = self.tracer.as_ref() {
            t.0.on_disconnected();
        }
        /* use nodelay/local_addr function to detect socket status */
        let ret = match &self.stream().get_ref().stream {
            RawStream::Tcp(s) => s.nodelay().err(),
            #[cfg(unix)]
            RawStream::Unix(s) => s.local_addr().err(),
            RawStream::Virtual(_) => {
                // TODO: should this do something?
                None
            }
        };
        if let Some(e) = ret {
            match e.kind() {
                tokio::io::ErrorKind::Other => {
                    if let Some(ecode) = e.raw_os_error() {
                        if ecode == 9 {
                            // Or we could panic here
                            error!("Crit: socket {:?} is being double closed", self.stream);
                        }
                    }
                }
                _ => {
                    debug!("Socket is already broken {:?}", e);
                }
            }
        } else {
            // try flush the write buffer. We use now_or_never() because
            // 1. Drop cannot be async
            // 2. write should usually be ready, unless the buf is full.
            let _ = self.flush().now_or_never();
        }
        debug!("Dropping socket {:?}", self.stream);
    }
}

impl AsyncRead for Stream {
    fn poll_read(
        mut self: Pin<&mut Self>,
        cx: &mut Context<'_>,
        buf: &mut ReadBuf<'_>,
    ) -> Poll<io::Result<()>> {
        let result = if !self.rewind_read_buf.is_empty() {
            let data_to_read = self.rewind_read_buf.pop().unwrap(); // safe
            let mut data_to_read = data_to_read.as_slice();
            let result = Pin::new(&mut data_to_read).poll_read(cx, buf);
            // return the remaining data back to the head of rewind_read_buf
            if !data_to_read.is_empty() {
                let remaining_buf = Vec::from(data_to_read);
                self.rewind_read_buf.push(remaining_buf);
            }
            result
        } else {
            Pin::new(&mut self.stream_mut()).poll_read(cx, buf)
        };
        self.read_pending_time.poll_time(&result);
        self.rx_ts = self.stream().get_ref().rx_ts;
        result
    }
}

impl AsyncWrite for Stream {
    fn poll_write(
        mut self: Pin<&mut Self>,
        cx: &mut Context,
        buf: &[u8],
    ) -> Poll<io::Result<usize>> {
        let result = if self.buffer_write {
            Pin::new(&mut self.stream_mut()).poll_write(cx, buf)
        } else {
            Pin::new(&mut self.stream_mut().get_mut()).poll_write(cx, buf)
        };
        self.write_pending_time.poll_write_time(&result, buf.len());
        result
    }

    fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>> {
        let result = Pin::new(&mut self.stream_mut()).poll_flush(cx);
        self.write_pending_time.poll_time(&result);
        result
    }

    fn poll_shutdown(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>> {
        Pin::new(&mut self.stream_mut()).poll_shutdown(cx)
    }

    fn poll_write_vectored(
        mut self: Pin<&mut Self>,
        cx: &mut Context<'_>,
        bufs: &[std::io::IoSlice<'_>],
    ) -> Poll<io::Result<usize>> {
        let total_size = bufs.iter().fold(0, |acc, s| acc + s.len());

        let result = if self.buffer_write {
            Pin::new(&mut self.stream_mut()).poll_write_vectored(cx, bufs)
        } else {
            Pin::new(&mut self.stream_mut().get_mut()).poll_write_vectored(cx, bufs)
        };

        self.write_pending_time.poll_write_time(&result, total_size);
        result
    }

    fn is_write_vectored(&self) -> bool {
        if self.buffer_write {
            self.stream().is_write_vectored() // it is true
        } else {
            self.stream().get_ref().is_write_vectored()
        }
    }
}

pub mod async_write_vec {
    use bytes::Buf;
    use futures::ready;
    use std::future::Future;
    use std::io::IoSlice;
    use std::pin::Pin;
    use std::task::{Context, Poll};
    use tokio::io;
    use tokio::io::AsyncWrite;

    /*
        the missing write_buf https://github.com/tokio-rs/tokio/pull/3156#issuecomment-738207409
        https://github.com/tokio-rs/tokio/issues/2610
        In general vectored write is lost when accessing the trait object: Box<S: AsyncWrite>
    */

    #[must_use = "futures do nothing unless you `.await` or poll them"]
    pub struct WriteVec<'a, W, B> {
        writer: &'a mut W,
        buf: &'a mut B,
    }

    #[must_use = "futures do nothing unless you `.await` or poll them"]
    pub struct WriteVecAll<'a, W, B> {
        writer: &'a mut W,
        buf: &'a mut B,
    }

    pub trait AsyncWriteVec {
        fn poll_write_vec<B: Buf>(
            self: Pin<&mut Self>,
            _cx: &mut Context<'_>,
            _buf: &mut B,
        ) -> Poll<io::Result<usize>>;

        fn write_vec<'a, B>(&'a mut self, src: &'a mut B) -> WriteVec<'a, Self, B>
        where
            Self: Sized,
            B: Buf,
        {
            WriteVec {
                writer: self,
                buf: src,
            }
        }

        fn write_vec_all<'a, B>(&'a mut self, src: &'a mut B) -> WriteVecAll<'a, Self, B>
        where
            Self: Sized,
            B: Buf,
        {
            WriteVecAll {
                writer: self,
                buf: src,
            }
        }
    }

    impl<W, B> Future for WriteVec<'_, W, B>
    where
        W: AsyncWriteVec + Unpin,
        B: Buf,
    {
        type Output = io::Result<usize>;

        fn poll(mut self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll<io::Result<usize>> {
            let me = &mut *self;
            Pin::new(&mut *me.writer).poll_write_vec(ctx, me.buf)
        }
    }

    impl<W, B> Future for WriteVecAll<'_, W, B>
    where
        W: AsyncWriteVec + Unpin,
        B: Buf,
    {
        type Output = io::Result<()>;

        fn poll(mut self: Pin<&mut Self>, ctx: &mut Context<'_>) -> Poll<io::Result<()>> {
            let me = &mut *self;
            while me.buf.has_remaining() {
                let n = ready!(Pin::new(&mut *me.writer).poll_write_vec(ctx, me.buf))?;
                if n == 0 {
                    return Poll::Ready(Err(io::ErrorKind::WriteZero.into()));
                }
            }
            Poll::Ready(Ok(()))
        }
    }

    /* from https://github.com/tokio-rs/tokio/blob/master/tokio-util/src/lib.rs#L177 */
    impl<T> AsyncWriteVec for T
    where
        T: AsyncWrite,
    {
        fn poll_write_vec<B: Buf>(
            self: Pin<&mut Self>,
            ctx: &mut Context,
            buf: &mut B,
        ) -> Poll<io::Result<usize>> {
            const MAX_BUFS: usize = 64;

            if !buf.has_remaining() {
                return Poll::Ready(Ok(0));
            }

            let n = if self.is_write_vectored() {
                let mut slices = [IoSlice::new(&[]); MAX_BUFS];
                let cnt = buf.chunks_vectored(&mut slices);
                ready!(self.poll_write_vectored(ctx, &slices[..cnt]))?
            } else {
                ready!(self.poll_write(ctx, buf.chunk()))?
            };

            buf.advance(n);

            Poll::Ready(Ok(n))
        }
    }
}

pub use async_write_vec::AsyncWriteVec;

#[derive(Debug)]
struct AccumulatedDuration {
    total: Duration,
    last_start: Option<Instant>,
}

impl AccumulatedDuration {
    fn new() -> Self {
        AccumulatedDuration {
            total: Duration::ZERO,
            last_start: None,
        }
    }

    fn start(&mut self) {
        if self.last_start.is_none() {
            self.last_start = Some(Instant::now());
        }
    }

    fn stop(&mut self) {
        if let Some(start) = self.last_start.take() {
            self.total += start.elapsed();
        }
    }

    fn poll_write_time(&mut self, result: &Poll<io::Result<usize>>, buf_size: usize) {
        match result {
            Poll::Ready(Ok(n)) => {
                if *n == buf_size {
                    self.stop();
                } else {
                    // partial write
                    self.start();
                }
            }
            Poll::Ready(Err(_)) => {
                self.stop();
            }
            _ => self.start(),
        }
    }

    fn poll_time(&mut self, result: &Poll<io::Result<()>>) {
        match result {
            Poll::Ready(_) => {
                self.stop();
            }
            _ => self.start(),
        }
    }
}

#[cfg(test)]
#[cfg(target_os = "linux")]
mod tests {
    use super::*;
    use std::sync::Arc;
    use tokio::io::AsyncReadExt;
    use tokio::io::AsyncWriteExt;
    use tokio::net::TcpListener;
    use tokio::sync::Notify;

    #[cfg(target_os = "linux")]
    #[tokio::test]
    async fn test_rx_timestamp() {
        let message = "hello world".as_bytes();
        let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
        let addr = listener.local_addr().unwrap();
        let notify = Arc::new(Notify::new());
        let notify2 = notify.clone();

        tokio::spawn(async move {
            let (mut stream, _) = listener.accept().await.unwrap();
            notify2.notified().await;
            stream.write_all(message).await.unwrap();
        });

        let mut stream: Stream = TcpStream::connect(addr).await.unwrap().into();
        stream.set_rx_timestamp().unwrap();
        // Receive the message
        // setsockopt for SO_TIMESTAMPING is asynchronous so sleep a little bit
        // to let kernel do the work
        std::thread::sleep(Duration::from_micros(100));
        notify.notify_one();

        let mut buffer = vec![0u8; message.len()];
        let n = stream.read(buffer.as_mut_slice()).await.unwrap();
        assert_eq!(n, message.len());
        assert!(stream.rx_ts.is_some());
    }

    #[cfg(target_os = "linux")]
    #[tokio::test]
    async fn test_rx_timestamp_standard_path() {
        let message = "hello world".as_bytes();
        let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
        let addr = listener.local_addr().unwrap();
        let notify = Arc::new(Notify::new());
        let notify2 = notify.clone();

        tokio::spawn(async move {
            let (mut stream, _) = listener.accept().await.unwrap();
            notify2.notified().await;
            stream.write_all(message).await.unwrap();
        });

        let mut stream: Stream = TcpStream::connect(addr).await.unwrap().into();
        std::thread::sleep(Duration::from_micros(100));
        notify.notify_one();

        let mut buffer = vec![0u8; message.len()];
        let n = stream.read(buffer.as_mut_slice()).await.unwrap();
        assert_eq!(n, message.len());
        assert!(stream.rx_ts.is_none());
    }

    #[tokio::test]
    async fn test_stream_rewind() {
        let message = b"hello world";
        let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
        let addr = listener.local_addr().unwrap();
        let notify = Arc::new(Notify::new());
        let notify2 = notify.clone();

        tokio::spawn(async move {
            let (mut stream, _) = listener.accept().await.unwrap();
            notify2.notified().await;
            stream.write_all(message).await.unwrap();
        });

        let mut stream: Stream = TcpStream::connect(addr).await.unwrap().into();

        let rewind_test = b"this is Sparta!";
        stream.rewind(rewind_test);

        // partially read rewind_test because of the buffer size limit
        let mut buffer = vec![0u8; message.len()];
        let n = stream.read(buffer.as_mut_slice()).await.unwrap();
        assert_eq!(n, message.len());
        assert_eq!(buffer, rewind_test[..message.len()]);

        // read the rest of rewind_test
        let n = stream.read(buffer.as_mut_slice()).await.unwrap();
        assert_eq!(n, rewind_test.len() - message.len());
        assert_eq!(buffer[..n], rewind_test[message.len()..]);

        // read the actual data
        notify.notify_one();
        let n = stream.read(buffer.as_mut_slice()).await.unwrap();
        assert_eq!(n, message.len());
        assert_eq!(buffer, message);
    }

    #[tokio::test]
    async fn test_stream_peek() {
        let message = b"hello world";
        dbg!("try peek");
        let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
        let addr = listener.local_addr().unwrap();
        let notify = Arc::new(Notify::new());
        let notify2 = notify.clone();

        tokio::spawn(async move {
            let (mut stream, _) = listener.accept().await.unwrap();
            notify2.notified().await;
            stream.write_all(message).await.unwrap();
            drop(stream);
        });

        notify.notify_one();

        let mut stream: Stream = TcpStream::connect(addr).await.unwrap().into();
        let mut buffer = vec![0u8; 5];
        assert!(stream.try_peek(&mut buffer).await.unwrap());
        assert_eq!(buffer, message[0..5]);
        let mut buffer = vec![];
        stream.read_to_end(&mut buffer).await.unwrap();
        assert_eq!(buffer, message);
    }

    #[tokio::test]
    async fn test_stream_two_subsequent_peek_calls_before_read() {
        let message = b"abcdefghijklmn";

        let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
        let addr = listener.local_addr().unwrap();
        let notify = Arc::new(Notify::new());
        let notify2 = notify.clone();

        tokio::spawn(async move {
            let (mut stream, _) = listener.accept().await.unwrap();
            notify2.notified().await;
            stream.write_all(message).await.unwrap();
            drop(stream);
        });

        notify.notify_one();

        let mut stream: Stream = TcpStream::connect(addr).await.unwrap().into();

        // Peek 4 bytes
        let mut buffer = vec![0u8; 4];
        assert!(stream.try_peek(&mut buffer).await.unwrap());
        assert_eq!(buffer, message[0..4]);

        // Peek 2 bytes
        let mut buffer = vec![0u8; 2];
        assert!(stream.try_peek(&mut buffer).await.unwrap());
        assert_eq!(buffer, message[0..2]);

        // Read 1 byte: ['a']
        let mut buffer = vec![0u8; 1];
        stream.read_exact(&mut buffer).await.unwrap();
        assert_eq!(buffer, message[0..1]);

        // Read as many bytes as possible, return 1 byte ['b']
        //  from the first retry buffer chunk
        let mut buffer = vec![0u8; 100];
        let n = stream.read(&mut buffer).await.unwrap();
        assert_eq!(n, 1);
        assert_eq!(buffer[..n], message[1..2]);

        // Read the rest ['cdefghijklmn']
        let mut buffer = vec![];
        stream.read_to_end(&mut buffer).await.unwrap();
        assert_eq!(buffer, message[2..]);
    }
}


================================================
FILE: pingora-core/src/protocols/l4/virt.rs
================================================
//! Provides [`VirtualSocketStream`].

use std::{
    pin::Pin,
    task::{Context, Poll},
};

use tokio::io::{AsyncRead, AsyncWrite};

use super::ext::TcpKeepalive;

/// A limited set of socket options that can be set on a [`VirtualSocket`].
#[non_exhaustive]
#[derive(Debug, Clone)]
pub enum VirtualSockOpt {
    NoDelay,
    KeepAlive(TcpKeepalive),
}

/// A "virtual" socket that supports async read and write operations.
pub trait VirtualSocket: AsyncRead + AsyncWrite + Unpin + Send + Sync + std::fmt::Debug {
    /// Set a socket option.
    fn set_socket_option(&self, opt: VirtualSockOpt) -> std::io::Result<()>;
}

/// Wrapper around any type implementing  [`VirtualSocket`].
#[derive(Debug)]
pub struct VirtualSocketStream {
    pub(crate) socket: Box<dyn VirtualSocket>,
}

impl VirtualSocketStream {
    pub fn new(socket: Box<dyn VirtualSocket>) -> Self {
        Self { socket }
    }

    #[inline]
    pub fn set_socket_option(&self, opt: VirtualSockOpt) -> std::io::Result<()> {
        self.socket.set_socket_option(opt)
    }
}

impl AsyncRead for VirtualSocketStream {
    #[inline]
    fn poll_read(
        self: Pin<&mut Self>,
        cx: &mut Context<'_>,
        buf: &mut tokio::io::ReadBuf<'_>,
    ) -> Poll<std::io::Result<()>> {
        Pin::new(&mut *self.get_mut().socket).poll_read(cx, buf)
    }
}

impl AsyncWrite for VirtualSocketStream {
    #[inline]
    fn poll_write(
        self: Pin<&mut Self>,
        cx: &mut Context<'_>,
        buf: &[u8],
    ) -> Poll<std::io::Result<usize>> {
        Pin::new(&mut *self.get_mut().socket).poll_write(cx, buf)
    }

    #[inline]
    fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<std::io::Result<()>> {
        Pin::new(&mut *self.get_mut().socket).poll_flush(cx)
    }

    #[inline]
    fn poll_shutdown(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<std::io::Result<()>> {
        Pin::new(&mut *self.get_mut().socket).poll_shutdown(cx)
    }
}

#[cfg(test)]
mod tests {
    use std::sync::{Arc, Mutex};

    use tokio::io::{AsyncReadExt, AsyncWriteExt as _};

    use crate::protocols::l4::stream::Stream;

    use super::*;

    #[derive(Debug)]
    struct StaticVirtualSocket {
        content: Vec<u8>,
        read_pos: usize,
        write_buf: Arc<Mutex<Vec<u8>>>,
    }

    impl AsyncRead for StaticVirtualSocket {
        fn poll_read(
            mut self: Pin<&mut Self>,
            _cx: &mut Context<'_>,
            buf: &mut tokio::io::ReadBuf<'_>,
        ) -> Poll<std::io::Result<()>> {
            debug_assert!(self.read_pos <= self.content.len());

            let remaining = self.content.len() - self.read_pos;
            if remaining == 0 {
                return Poll::Ready(Ok(()));
            }

            let to_read = std::cmp::min(remaining, buf.remaining());
            buf.put_slice(&self.content[self.read_pos..self.read_pos + to_read]);
            self.read_pos += to_read;

            Poll::Ready(Ok(()))
        }
    }

    impl AsyncWrite for StaticVirtualSocket {
        fn poll_write(
            self: Pin<&mut Self>,
            _cx: &mut Context<'_>,
            buf: &[u8],
        ) -> Poll<std::io::Result<usize>> {
            // write to internal buffer
            let this = self.get_mut();
            this.write_buf.lock().unwrap().extend_from_slice(buf);
            Poll::Ready(Ok(buf.len()))
        }

        fn poll_flush(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<std::io::Result<()>> {
            Poll::Ready(Ok(()))
        }

        fn poll_shutdown(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<std::io::Result<()>> {
            Poll::Ready(Ok(()))
        }
    }

    impl VirtualSocket for StaticVirtualSocket {
        fn set_socket_option(&self, _opt: VirtualSockOpt) -> std::io::Result<()> {
            Ok(())
        }
    }

    /// Basic test that ensures reading and writing works with a virtual socket.
    //
    /// Mostly just ensures that construction works and the plumbing is correct.
    #[tokio::test]
    async fn test_stream_virtual() {
        let content = b"hello virtual world";
        let write_buf = Arc::new(Mutex::new(Vec::new()));
        let mut stream = Stream::from(VirtualSocketStream::new(Box::new(StaticVirtualSocket {
            content: content.to_vec(),
            read_pos: 0,
            write_buf: write_buf.clone(),
        })));

        let mut buf = Vec::new();
        let out = stream.read_to_end(&mut buf).await.unwrap();
        assert_eq!(out, content.len());
        assert_eq!(buf, content);

        stream.write_all(content).await.unwrap();
        assert_eq!(write_buf.lock().unwrap().as_slice(), content);
    }
}


================================================
FILE: pingora-core/src/protocols/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Abstractions and implementations for protocols including TCP, TLS and HTTP

mod digest;
pub mod http;
pub mod l4;
pub mod raw_connect;
pub mod tls;
#[cfg(windows)]
mod windows;

pub use digest::{
    Digest, GetProxyDigest, GetSocketDigest, GetTimingDigest, ProtoDigest, SocketDigest,
    TimingDigest,
};
pub use l4::ext::TcpKeepalive;
pub use tls::ALPN;

use async_trait::async_trait;
use std::fmt::Debug;
use std::net::{IpAddr, Ipv4Addr};
use std::sync::Arc;

#[cfg(unix)]
pub type UniqueIDType = i32;
#[cfg(windows)]
pub type UniqueIDType = usize;

/// Define how a protocol should shutdown its connection.
#[async_trait]
pub trait Shutdown {
    async fn shutdown(&mut self) -> ();
}

/// Define how a given session/connection identifies itself.
pub trait UniqueID {
    /// The ID returned should be unique among all existing connections of the same type.
    /// But ID can be recycled after a connection is shutdown.
    fn id(&self) -> UniqueIDType;
}

/// Interface to get TLS info
pub trait Ssl {
    /// Return the TLS info if the connection is over TLS
    fn get_ssl(&self) -> Option<&TlsRef> {
        None
    }

    /// Return the [`tls::SslDigest`] for logging
    fn get_ssl_digest(&self) -> Option<Arc<tls::SslDigest>> {
        None
    }

    /// Return selected ALPN if any
    fn selected_alpn_proto(&self) -> Option<ALPN> {
        None
    }
}

/// The ability peek data before consuming it
#[async_trait]
pub trait Peek {
    /// Peek data but not consuming it. This call should block until some data
    /// is sent.
    /// Return `false` if peeking is not supported/allowed.
    async fn try_peek(&mut self, _buf: &mut [u8]) -> std::io::Result<bool> {
        Ok(false)
    }
}

use std::any::Any;
use tokio::io::{AsyncRead, AsyncWrite};

/// The abstraction of transport layer IO
pub trait IO:
    AsyncRead
    + AsyncWrite
    + Shutdown
    + UniqueID
    + Ssl
    + GetTimingDigest
    + GetProxyDigest
    + GetSocketDigest
    + Peek
    + Unpin
    + Debug
    + Send
    + Sync
{
    /// helper to cast as the reference of the concrete type
    fn as_any(&self) -> &dyn Any;
    /// helper to cast back of the concrete type
    fn into_any(self: Box<Self>) -> Box<dyn Any>;
}

impl<
        T: AsyncRead
            + AsyncWrite
            + Shutdown
            + UniqueID
            + Ssl
            + GetTimingDigest
            + GetProxyDigest
            + GetSocketDigest
            + Peek
            + Unpin
            + Debug
            + Send
            + Sync,
    > IO for T
where
    T: 'static,
{
    fn as_any(&self) -> &dyn Any {
        self
    }
    fn into_any(self: Box<Self>) -> Box<dyn Any> {
        self
    }
}

/// The type of any established transport layer connection
pub type Stream = Box<dyn IO>;

// Implement IO trait for 3rd party types, mostly for testing
mod ext_io_impl {
    use super::*;
    use tokio_test::io::Mock;

    #[async_trait]
    impl Shutdown for Mock {
        async fn shutdown(&mut self) -> () {}
    }
    impl UniqueID for Mock {
        fn id(&self) -> UniqueIDType {
            0
        }
    }
    impl Ssl for Mock {}
    impl GetTimingDigest for Mock {
        fn get_timing_digest(&self) -> Vec<Option<TimingDigest>> {
            vec![]
        }
    }
    impl GetProxyDigest for Mock {
        fn get_proxy_digest(&self) -> Option<Arc<raw_connect::ProxyDigest>> {
            None
        }
    }
    impl GetSocketDigest for Mock {
        fn get_socket_digest(&self) -> Option<Arc<SocketDigest>> {
            None
        }
    }

    impl Peek for Mock {}

    use std::io::Cursor;

    #[async_trait]
    impl<T: Send> Shutdown for Cursor<T> {
        async fn shutdown(&mut self) -> () {}
    }
    impl<T> UniqueID for Cursor<T> {
        fn id(&self) -> UniqueIDType {
            0
        }
    }
    impl<T> Ssl for Cursor<T> {}
    impl<T> GetTimingDigest for Cursor<T> {
        fn get_timing_digest(&self) -> Vec<Option<TimingDigest>> {
            vec![]
        }
    }
    impl<T> GetProxyDigest for Cursor<T> {
        fn get_proxy_digest(&self) -> Option<Arc<raw_connect::ProxyDigest>> {
            None
        }
    }
    impl<T> GetSocketDigest for Cursor<T> {
        fn get_socket_digest(&self) -> Option<Arc<SocketDigest>> {
            None
        }
    }
    impl<T> Peek for Cursor<T> {}

    use tokio::io::DuplexStream;

    #[async_trait]
    impl Shutdown for DuplexStream {
        async fn shutdown(&mut self) -> () {}
    }
    impl UniqueID for DuplexStream {
        fn id(&self) -> UniqueIDType {
            0
        }
    }
    impl Ssl for DuplexStream {}
    impl GetTimingDigest for DuplexStream {
        fn get_timing_digest(&self) -> Vec<Option<TimingDigest>> {
            vec![]
        }
    }
    impl GetProxyDigest for DuplexStream {
        fn get_proxy_digest(&self) -> Option<Arc<raw_connect::ProxyDigest>> {
            None
        }
    }
    impl GetSocketDigest for DuplexStream {
        fn get_socket_digest(&self) -> Option<Arc<SocketDigest>> {
            None
        }
    }

    impl Peek for DuplexStream {}
}

#[cfg(unix)]
pub mod ext_test {
    use std::sync::Arc;

    use async_trait::async_trait;

    use super::{
        raw_connect, GetProxyDigest, GetSocketDigest, GetTimingDigest, Peek, Shutdown,
        SocketDigest, Ssl, TimingDigest, UniqueID, UniqueIDType,
    };

    #[async_trait]
    impl Shutdown for tokio::net::UnixStream {
        async fn shutdown(&mut self) -> () {}
    }
    impl UniqueID for tokio::net::UnixStream {
        fn id(&self) -> UniqueIDType {
            0
        }
    }
    impl Ssl for tokio::net::UnixStream {}
    impl GetTimingDigest for tokio::net::UnixStream {
        fn get_timing_digest(&self) -> Vec<Option<TimingDigest>> {
            vec![]
        }
    }
    impl GetProxyDigest for tokio::net::UnixStream {
        fn get_proxy_digest(&self) -> Option<Arc<raw_connect::ProxyDigest>> {
            None
        }
    }
    impl GetSocketDigest for tokio::net::UnixStream {
        fn get_socket_digest(&self) -> Option<Arc<SocketDigest>> {
            None
        }
    }

    impl Peek for tokio::net::UnixStream {}
}

#[cfg(unix)]
pub(crate) trait ConnFdReusable {
    fn check_fd_match<V: AsRawFd>(&self, fd: V) -> bool;
}

#[cfg(windows)]
pub(crate) trait ConnSockReusable {
    fn check_sock_match<V: AsRawSocket>(&self, sock: V) -> bool;
}

use l4::socket::SocketAddr;
use log::{debug, error};
#[cfg(unix)]
use nix::sys::socket::{getpeername, SockaddrStorage, UnixAddr};
#[cfg(unix)]
use std::os::unix::prelude::AsRawFd;
#[cfg(windows)]
use std::os::windows::io::AsRawSocket;
use std::{net::SocketAddr as InetSocketAddr, path::Path};

use crate::protocols::tls::TlsRef;

#[cfg(unix)]
impl ConnFdReusable for SocketAddr {
    fn check_fd_match<V: AsRawFd>(&self, fd: V) -> bool {
        match self {
            SocketAddr::Inet(addr) => addr.check_fd_match(fd),
            SocketAddr::Unix(addr) => addr
                .as_pathname()
                .expect("non-pathname unix sockets not supported as peer")
                .check_fd_match(fd),
        }
    }
}

#[cfg(windows)]
impl ConnSockReusable for SocketAddr {
    fn check_sock_match<V: AsRawSocket>(&self, sock: V) -> bool {
        match self {
            SocketAddr::Inet(addr) => addr.check_sock_match(sock),
        }
    }
}

#[cfg(unix)]
impl ConnFdReusable for Path {
    fn check_fd_match<V: AsRawFd>(&self, fd: V) -> bool {
        let fd = fd.as_raw_fd();
        match getpeername::<UnixAddr>(fd) {
            Ok(peer) => match UnixAddr::new(self) {
                Ok(addr) => {
                    if addr == peer {
                        debug!("Unix FD to: {peer} is reusable");
                        true
                    } else {
                        error!("Crit: unix FD mismatch: fd: {fd:?}, peer: {peer}, addr: {addr}",);
                        false
                    }
                }
                Err(e) => {
                    error!("Bad addr: {self:?}, error: {e:?}");
                    false
                }
            },
            Err(e) => {
                error!("Idle unix connection is broken: {e:?}");
                false
            }
        }
    }
}

#[cfg(unix)]
impl ConnFdReusable for InetSocketAddr {
    fn check_fd_match<V: AsRawFd>(&self, fd: V) -> bool {
        let fd = fd.as_raw_fd();
        match getpeername::<SockaddrStorage>(fd) {
            Ok(peer) => {
                const ZERO: IpAddr = IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0));
                if self.ip() == ZERO {
                    // https://www.rfc-editor.org/rfc/rfc1122.html#section-3.2.1.3
                    // 0.0.0.0 should only be used as source IP not destination
                    // However in some systems this destination IP is mapped to 127.0.0.1.
                    // We just skip this check here to avoid false positive mismatch.
                    return true;
                }
                let addr = SockaddrStorage::from(*self);
                if addr == peer {
                    debug!("Inet FD to: {addr} is reusable");
                    true
                } else {
                    error!("Crit: FD mismatch: fd: {fd:?}, addr: {addr}, peer: {peer}",);
                    false
                }
            }
            Err(e) => {
                debug!("Idle connection is broken: {e:?}");
                false
            }
        }
    }
}

#[cfg(windows)]
impl ConnSockReusable for InetSocketAddr {
    fn check_sock_match<V: AsRawSocket>(&self, sock: V) -> bool {
        let sock = sock.as_raw_socket();
        match windows::peer_addr(sock) {
            Ok(peer) => {
                const ZERO: IpAddr = IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0));
                if self.ip() == ZERO {
                    // https://www.rfc-editor.org/rfc/rfc1122.html#section-3.2.1.3
                    // 0.0.0.0 should only be used as source IP not destination
                    // However in some systems this destination IP is mapped to 127.0.0.1.
                    // We just skip this check here to avoid false positive mismatch.
                    return true;
                }
                if self == &peer {
                    debug!("Inet FD to: {self} is reusable");
                    true
                } else {
                    error!("Crit: FD mismatch: fd: {sock:?}, addr: {self}, peer: {peer}",);
                    false
                }
            }
            Err(e) => {
                debug!("Idle connection is broken: {e:?}");
                false
            }
        }
    }
}


================================================
FILE: pingora-core/src/protocols/raw_connect.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! CONNECT protocol over http 1.1 via raw Unix domain socket
//!
//! This mod implements the most rudimentary CONNECT client over raw stream.
//! The idea is to yield raw stream once the CONNECT handshake is complete
//! so that the protocol encapsulated can use the stream directly.
//! This idea only works for CONNECT over HTTP 1.1 and localhost (or where the server is close by).

use std::any::Any;

use super::http::v1::client::HttpSession;
use super::http::v1::common::*;
use super::Stream;

use bytes::{BufMut, BytesMut};
use http::request::Parts as ReqHeader;
use http::Version;
use pingora_error::{Error, ErrorType::*, OrErr, Result};
use pingora_http::ResponseHeader;
use tokio::io::AsyncWriteExt;

/// Try to establish a CONNECT proxy via the given `stream`.
///
/// `request_header` should include the necessary request headers for the CONNECT protocol.
///
/// When successful, a [`Stream`] will be returned which is the established CONNECT proxy connection.
pub async fn connect<P>(
    stream: Stream,
    request_header: &ReqHeader,
    peer: &P,
) -> Result<(Stream, ProxyDigest)>
where
    P: crate::upstreams::peer::Peer,
{
    let mut http = HttpSession::new(stream);

    // We write to stream directly because HttpSession doesn't write req header in auth form
    let to_wire = http_req_header_to_wire_auth_form(request_header);
    http.underlying_stream
        .write_all(to_wire.as_ref())
        .await
        .or_err(WriteError, "while writing request headers")?;
    http.underlying_stream
        .flush()
        .await
        .or_err(WriteError, "while flushing request headers")?;

    // TODO: set http.read_timeout
    let resp_header = http.read_resp_header_parts().await?;
    Ok((
        http.underlying_stream,
        validate_connect_response(resp_header, peer, request_header)?,
    ))
}

/// Generate the CONNECT header for the given destination
pub fn generate_connect_header<'a, H, S>(
    host: &str,
    port: u16,
    headers: H,
) -> Result<Box<ReqHeader>>
where
    S: AsRef<[u8]>,
    H: Iterator<Item = (S, &'a Vec<u8>)>,
{
    // TODO: valid that host doesn't have port

    let authority = if host.parse::<std::net::Ipv6Addr>().is_ok() {
        format!("[{host}]:{port}")
    } else {
        format!("{host}:{port}")
    };

    let req = http::request::Builder::new()
        .version(http::Version::HTTP_11)
        .method(http::method::Method::CONNECT)
        .uri(format!("https://{authority}/")) // scheme doesn't matter
        .header(http::header::HOST, &authority);

    let (mut req, _) = match req.body(()) {
        Ok(r) => r.into_parts(),
        Err(e) => {
            return Err(e).or_err(InvalidHTTPHeader, "Invalid CONNECT request");
        }
    };

    for (k, v) in headers {
        let header_name = http::header::HeaderName::from_bytes(k.as_ref())
            .or_err(InvalidHTTPHeader, "Invalid CONNECT request")?;
        let header_value = http::header::HeaderValue::from_bytes(v.as_slice())
            .or_err(InvalidHTTPHeader, "Invalid CONNECT request")?;
        req.headers.insert(header_name, header_value);
    }

    Ok(Box::new(req))
}

/// The information about the CONNECT proxy.
#[derive(Debug)]
pub struct ProxyDigest {
    /// The response header the proxy returns
    pub response: Box<ResponseHeader>,
    /// Optional arbitrary data.
    pub user_data: Option<Box<dyn Any + Send + Sync>>,
}

impl ProxyDigest {
    pub fn new(
        response: Box<ResponseHeader>,
        user_data: Option<Box<dyn Any + Send + Sync>>,
    ) -> Self {
        ProxyDigest {
            response,
            user_data,
        }
    }
}

/// The error returned when the CONNECT proxy fails to establish.
#[derive(Debug)]
pub struct ConnectProxyError {
    /// The response header the proxy returns
    pub response: Box<ResponseHeader>,
}

impl ConnectProxyError {
    pub fn boxed_new(response: Box<ResponseHeader>) -> Box<Self> {
        Box::new(ConnectProxyError { response })
    }
}

impl std::fmt::Display for ConnectProxyError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        const PROXY_STATUS: &str = "proxy-status";

        let reason = self
            .response
            .headers
            .get(PROXY_STATUS)
            .and_then(|s| s.to_str().ok())
            .unwrap_or("missing proxy-status header value");
        write!(
            f,
            "Failed CONNECT Response: status {}, proxy-status {reason}",
            &self.response.status
        )
    }
}

impl std::error::Error for ConnectProxyError {}

#[inline]
fn http_req_header_to_wire_auth_form(req: &ReqHeader) -> BytesMut {
    let mut buf = BytesMut::with_capacity(512);

    // Request-Line
    let method = req.method.as_str().as_bytes();
    buf.put_slice(method);
    buf.put_u8(b' ');
    // NOTE: CONNECT doesn't need URI path so we just skip that
    if let Some(path) = req.uri.authority() {
        buf.put_slice(path.as_str().as_bytes());
    }
    buf.put_u8(b' ');

    let version = match req.version {
        Version::HTTP_09 => "HTTP/0.9",
        Version::HTTP_10 => "HTTP/1.0",
        Version::HTTP_11 => "HTTP/1.1",
        _ => "HTTP/0.9",
    };
    buf.put_slice(version.as_bytes());
    buf.put_slice(CRLF);

    // headers
    let headers = &req.headers;
    for (key, value) in headers.iter() {
        buf.put_slice(key.as_ref());
        buf.put_slice(HEADER_KV_DELIMITER);
        buf.put_slice(value.as_ref());
        buf.put_slice(CRLF);
    }

    buf.put_slice(CRLF);
    buf
}

#[inline]
fn validate_connect_response<P>(
    resp: Box<ResponseHeader>,
    peer: &P,
    req: &ReqHeader,
) -> Result<ProxyDigest>
where
    P: crate::upstreams::peer::Peer,
{
    if !resp.status.is_success() {
        return Error::e_because(
            ConnectProxyFailure,
            "None 2xx code",
            ConnectProxyError::boxed_new(resp),
        );
    }

    // Checking Content-Length and Transfer-Encoding is optional because we already ignore them.
    // We choose to do so because we want to be strict for internal use of CONNECT.
    // Ignore Content-Length header because our internal CONNECT server is coded to send it.
    if resp.headers.get(http::header::TRANSFER_ENCODING).is_some() {
        return Error::e_because(
            ConnectProxyFailure,
            "Invalid Transfer-Encoding presents",
            ConnectProxyError::boxed_new(resp),
        );
    }

    let user_data = peer
        .proxy_digest_user_data_hook()
        .and_then(|hook| hook(req, &resp));
    Ok(ProxyDigest::new(resp, user_data))
}

#[cfg(test)]
mod test_sync {
    use super::*;
    use std::collections::BTreeMap;
    use tokio_test::io::Builder;

    #[test]
    fn test_generate_connect_header() {
        let mut headers = BTreeMap::new();
        headers.insert(String::from("foo"), b"bar".to_vec());
        let req = generate_connect_header("pingora.org", 123, headers.iter()).unwrap();

        assert_eq!(req.method, http::method::Method::CONNECT);
        assert_eq!(req.uri.authority().unwrap(), "pingora.org:123");
        assert_eq!(req.headers.get("Host").unwrap(), "pingora.org:123");
        assert_eq!(req.headers.get("foo").unwrap(), "bar");
    }

    #[test]
    fn test_generate_connect_header_ipv6() {
        let mut headers = BTreeMap::new();
        headers.insert(String::from("foo"), b"bar".to_vec());
        let req = generate_connect_header("::1", 123, headers.iter()).unwrap();

        assert_eq!(req.method, http::method::Method::CONNECT);
        assert_eq!(req.uri.authority().unwrap(), "[::1]:123");
        assert_eq!(req.headers.get("Host").unwrap(), "[::1]:123");
        assert_eq!(req.headers.get("foo").unwrap(), "bar");
    }

    #[test]
    fn test_request_to_wire_auth_form() {
        let new_request = http::Request::builder()
            .method("CONNECT")
            .uri("https://pingora.org:123/")
            .header("Foo", "Bar")
            .body(())
            .unwrap();
        let (new_request, _) = new_request.into_parts();
        let wire = http_req_header_to_wire_auth_form(&new_request);
        assert_eq!(
            &b"CONNECT pingora.org:123 HTTP/1.1\r\nfoo: Bar\r\n\r\n"[..],
            &wire
        );
    }

    #[test]
    fn test_validate_connect_response() {
        use crate::upstreams::peer::BasicPeer;

        struct DummyUserData {
            some_num: i32,
            some_string: String,
        }

        let peer_no_data = BasicPeer::new("127.0.0.1:80");
        let mut peer_with_data = peer_no_data.clone();
        peer_with_data.options.proxy_digest_user_data_hook = Some(std::sync::Arc::new(
            |_req: &http::request::Parts, _resp: &pingora_http::ResponseHeader| {
                Some(Box::new(DummyUserData {
                    some_num: 42,
                    some_string: "test".to_string(),
                }) as Box<dyn std::any::Any + Send + Sync>)
            },
        ));

        let request = http::Request::builder()
            .method("CONNECT")
            .uri("https://example.com:443/")
            .body(())
            .unwrap();
        let (req_header, _) = request.into_parts();

        let resp = ResponseHeader::build(200, None).unwrap();
        let proxy_digest =
            validate_connect_response(Box::new(resp), &peer_with_data, &req_header).unwrap();
        assert!(proxy_digest.user_data.is_some());
        let user_data = proxy_digest
            .user_data
            .as_ref()
            .unwrap()
            .downcast_ref::<DummyUserData>()
            .unwrap();
        assert_eq!(user_data.some_num, 42);
        assert_eq!(user_data.some_string, "test");

        let resp = ResponseHeader::build(200, None).unwrap();
        let proxy_digest =
            validate_connect_response(Box::new(resp), &peer_no_data, &req_header).unwrap();
        assert!(proxy_digest.user_data.is_none());

        let resp = ResponseHeader::build(404, None).unwrap();
        assert!(validate_connect_response(Box::new(resp), &peer_with_data, &req_header).is_err());

        let mut resp = ResponseHeader::build(200, None).unwrap();
        resp.append_header("content-length", 0).unwrap();
        assert!(validate_connect_response(Box::new(resp), &peer_no_data, &req_header).is_ok());

        let mut resp = ResponseHeader::build(200, None).unwrap();
        resp.append_header("transfer-encoding", 0).unwrap();
        assert!(validate_connect_response(Box::new(resp), &peer_no_data, &req_header).is_err());
    }

    #[tokio::test]
    async fn test_connect_write_request() {
        use crate::upstreams::peer::BasicPeer;

        let wire = b"CONNECT pingora.org:123 HTTP/1.1\r\nhost: pingora.org:123\r\n\r\n";
        let mock_io = Box::new(Builder::new().write(wire).build());

        let headers: BTreeMap<String, Vec<u8>> = BTreeMap::new();
        let req = generate_connect_header("pingora.org", 123, headers.iter()).unwrap();
        let peer = BasicPeer::new("127.0.0.1:123");
        // ConnectionClosed
        assert!(connect(mock_io, &req, &peer).await.is_err());

        let to_wire = b"CONNECT pingora.org:123 HTTP/1.1\r\nhost: pingora.org:123\r\n\r\n";
        let from_wire = b"HTTP/1.1 200 OK\r\n\r\n";
        let mock_io = Box::new(Builder::new().write(to_wire).read(from_wire).build());

        let req = generate_connect_header("pingora.org", 123, headers.iter()).unwrap();
        let result = connect(mock_io, &req, &peer).await;
        assert!(result.is_ok());
    }
}


================================================
FILE: pingora-core/src/protocols/tls/boringssl_openssl/client.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! TLS client specific implementation

use crate::protocols::raw_connect::ProxyDigest;
use crate::protocols::tls::SslStream;
use crate::protocols::{
    GetProxyDigest, GetSocketDigest, GetTimingDigest, SocketDigest, TimingDigest, IO,
};
use crate::tls::{ssl, ssl::ConnectConfiguration, ssl::SslRef, ssl_sys::X509_V_ERR_INVALID_CALL};

use pingora_error::{Error, ErrorType::*, OrErr, Result};
use std::any::Any;
use std::sync::Arc;
use std::time::Duration;

/// Perform the TLS handshake for the given connection with the given configuration
pub async fn handshake<S: IO>(
    conn_config: ConnectConfiguration,
    domain: &str,
    io: S,
    complete_hook: Option<Arc<dyn Fn(&SslRef) -> Option<Arc<dyn Any + Send + Sync>> + Send + Sync>>,
) -> Result<SslStream<S>> {
    let ssl = conn_config
        .into_ssl(domain)
        .explain_err(TLSHandshakeFailure, |e| format!("ssl config error: {e}"))?;
    let mut stream = SslStream::new(ssl, io)
        .explain_err(TLSHandshakeFailure, |e| format!("ssl stream error: {e}"))?;
    let handshake_result = stream.connect().await;
    match handshake_result {
        Ok(()) => {
            if let Some(hook) = complete_hook {
                if let Some(extension) = hook(stream.ssl()) {
                    if let Some(digest_mut) = stream.ssl_digest_mut() {
                        digest_mut.extension.set(extension);
                    }
                }
            }
            Ok(stream)
        }
        Err(e) => {
            let context = format!("TLS connect() failed: {e}, SNI: {domain}");
            match e.code() {
                ssl::ErrorCode::SSL => {
                    // Unify the return type of `verify_result` for openssl
                    #[cfg(not(feature = "boringssl"))]
                    fn verify_result<S>(stream: SslStream<S>) -> Result<(), i32> {
                        match stream.ssl().verify_result().as_raw() {
                            crate::tls::ssl_sys::X509_V_OK => Ok(()),
                            e => Err(e),
                        }
                    }

                    // Unify the return type of `verify_result` for boringssl
                    #[cfg(feature = "boringssl")]
                    fn verify_result<S>(stream: SslStream<S>) -> Result<(), i32> {
                        stream.ssl().verify_result().map_err(|e| e.as_raw())
                    }

                    match verify_result(stream) {
                        Ok(()) => Error::e_explain(TLSHandshakeFailure, context),
                        // X509_V_ERR_INVALID_CALL in case verify result was never set
                        Err(X509_V_ERR_INVALID_CALL) => {
                            Error::e_explain(TLSHandshakeFailure, context)
                        }
                        _ => Error::e_explain(InvalidCert, context),
                    }
                }
                /* likely network error, but still mark as TLS error */
                _ => Error::e_explain(TLSHandshakeFailure, context),
            }
        }
    }
}

impl<S> GetTimingDigest for SslStream<S>
where
    S: GetTimingDigest,
{
    fn get_timing_digest(&self) -> Vec<Option<TimingDigest>> {
        let mut ts_vec = self.get_ref().get_timing_digest();
        ts_vec.push(Some(self.timing.clone()));
        ts_vec
    }
    fn get_read_pending_time(&self) -> Duration {
        self.get_ref().get_read_pending_time()
    }

    fn get_write_pending_time(&self) -> Duration {
        self.get_ref().get_write_pending_time()
    }
}

impl<S> GetProxyDigest for SslStream<S>
where
    S: GetProxyDigest,
{
    fn get_proxy_digest(&self) -> Option<Arc<ProxyDigest>> {
        self.get_ref().get_proxy_digest()
    }
}

impl<S> GetSocketDigest for SslStream<S>
where
    S: GetSocketDigest,
{
    fn get_socket_digest(&self) -> Option<Arc<SocketDigest>> {
        self.get_ref().get_socket_digest()
    }
    fn set_socket_digest(&mut self, socket_digest: SocketDigest) {
        self.get_mut().set_socket_digest(socket_digest)
    }
}


================================================
FILE: pingora-core/src/protocols/tls/boringssl_openssl/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

pub mod client;
pub mod server;
mod stream;

#[cfg(feature = "boringssl")]
use pingora_boringssl as ssl_lib;

#[cfg(feature = "openssl")]
use pingora_openssl as ssl_lib;

use ssl_lib::{ssl::SslRef, x509::X509};
pub use stream::*;

pub type TlsRef = SslRef;
pub type CaType = Box<[X509]>;


================================================
FILE: pingora-core/src/protocols/tls/boringssl_openssl/server.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! TLS server specific implementation

use crate::listeners::TlsAcceptCallbacks;
use crate::protocols::tls::SslStream;
use crate::protocols::{Shutdown, IO};
use crate::tls::ext;
use crate::tls::ext::ssl_from_acceptor;
use crate::tls::ssl;
use crate::tls::ssl::SslAcceptor;

use async_trait::async_trait;
use log::warn;
use pingora_error::{ErrorType::*, OrErr, Result};
use std::pin::Pin;
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};

/// Prepare a TLS stream for handshake
pub fn prepare_tls_stream<S: IO>(ssl_acceptor: &SslAcceptor, io: S) -> Result<SslStream<S>> {
    let ssl = ssl_from_acceptor(ssl_acceptor)
        .explain_err(TLSHandshakeFailure, |e| format!("ssl_acceptor error: {e}"))?;
    SslStream::new(ssl, io).explain_err(TLSHandshakeFailure, |e| format!("ssl stream error: {e}"))
}

/// Perform TLS handshake for the given connection with the given configuration
pub async fn handshake<S: IO>(ssl_acceptor: &SslAcceptor, io: S) -> Result<SslStream<S>> {
    let mut stream = prepare_tls_stream(ssl_acceptor, io)?;
    stream
        .accept()
        .await
        .explain_err(TLSHandshakeFailure, |e| format!("TLS accept() failed: {e}"))?;
    Ok(stream)
}

/// Perform TLS handshake for the given connection with the given configuration and callbacks
pub async fn handshake_with_callback<S: IO>(
    ssl_acceptor: &SslAcceptor,
    io: S,
    callbacks: &TlsAcceptCallbacks,
) -> Result<SslStream<S>> {
    let mut tls_stream = prepare_tls_stream(ssl_acceptor, io)?;
    let done = Pin::new(&mut tls_stream)
        .start_accept()
        .await
        .explain_err(TLSHandshakeFailure, |e| format!("TLS accept() failed: {e}"))?;
    if !done {
        // safety: we do hold a mut ref of tls_stream
        let ssl_mut = unsafe { ext::ssl_mut(tls_stream.ssl()) };
        callbacks.certificate_callback(ssl_mut).await;
        Pin::new(&mut tls_stream)
            .resume_accept()
            .await
            .explain_err(TLSHandshakeFailure, |e| format!("TLS accept() failed: {e}"))?;
    }
    {
        let ssl = tls_stream.ssl();
        if let Some(extension) = callbacks.handshake_complete_callback(ssl).await {
            if let Some(digest_mut) = tls_stream.ssl_digest_mut() {
                digest_mut.extension.set(extension);
            }
        }
    }
    Ok(tls_stream)
}

#[async_trait]
impl<S> Shutdown for SslStream<S>
where
    S: AsyncRead + AsyncWrite + Sync + Unpin + Send,
{
    async fn shutdown(&mut self) {
        match <Self as AsyncWriteExt>::shutdown(self).await {
            Ok(()) => {}
            Err(e) => {
                warn!("TLS shutdown failed, {e}");
            }
        }
    }
}

/// Resumable TLS server side handshake.
#[async_trait]
pub trait ResumableAccept {
    /// Start a resumable TLS accept handshake.
    ///
    /// * `Ok(true)` when the handshake is finished
    /// * `Ok(false)`` when the handshake is paused midway
    ///
    /// For now, the accept will only pause when a certificate is needed.
    async fn start_accept(self: Pin<&mut Self>) -> Result<bool, ssl::Error>;

    /// Continue the TLS handshake
    ///
    /// This function should be called after the certificate is provided.
    async fn resume_accept(self: Pin<&mut Self>) -> Result<(), ssl::Error>;
}

#[async_trait]
impl<S: AsyncRead + AsyncWrite + Send + Unpin> ResumableAccept for SslStream<S> {
    async fn start_accept(mut self: Pin<&mut Self>) -> Result<bool, ssl::Error> {
        // safety: &mut self
        let ssl_mut = unsafe { ext::ssl_mut(self.ssl()) };
        ext::suspend_when_need_ssl_cert(ssl_mut);
        let res = self.accept().await;

        match res {
            Ok(()) => Ok(true),
            Err(e) => {
                if ext::is_suspended_for_cert(&e) {
                    Ok(false)
                } else {
                    Err(e)
                }
            }
        }
    }

    async fn resume_accept(mut self: Pin<&mut Self>) -> Result<(), ssl::Error> {
        // safety: &mut ssl
        let ssl_mut = unsafe { ext::ssl_mut(self.ssl()) };
        ext::unblock_ssl_cert(ssl_mut);
        self.accept().await
    }
}

#[cfg(test)]
mod tests {
    use super::handshake_with_callback;

    use crate::listeners::{TlsAccept, TlsAcceptCallbacks};
    use crate::protocols::tls::SslStream;
    use crate::protocols::tls::TlsRef;
    use crate::tls::ext;
    use crate::tls::ssl;

    use async_trait::async_trait;
    use std::pin::Pin;
    use std::sync::Arc;
    use tokio::io::DuplexStream;

    async fn client_task(client: DuplexStream) {
        use tokio::io::AsyncReadExt;
        let ssl_context = ssl::SslContext::builder(ssl::SslMethod::tls())
            .unwrap()
            .build();
        let mut ssl = ssl::Ssl::new(&ssl_context).unwrap();
        ssl.set_hostname("pingora.org").unwrap();
        ssl.set_verify(ssl::SslVerifyMode::NONE); // we don have a valid cert
        let mut stream = SslStream::new(ssl, client).unwrap();
        Pin::new(&mut stream).connect().await.unwrap();
        let mut buf = [0; 1];
        let _ = stream.read(&mut buf).await;
    }

    #[tokio::test]
    #[cfg(feature = "any_tls")]
    async fn test_async_cert() {
        let acceptor = ssl::SslAcceptor::mozilla_intermediate_v5(ssl::SslMethod::tls())
            .unwrap()
            .build();

        struct Callback;
        #[async_trait]
        impl TlsAccept for Callback {
            async fn certificate_callback(&self, ssl: &mut TlsRef) -> () {
                assert_eq!(
                    ssl.servername(ssl::NameType::HOST_NAME).unwrap(),
                    "pingora.org"
                );
                let cert = format!("{}/tests/keys/server.crt", env!("CARGO_MANIFEST_DIR"));
                let key = format!("{}/tests/keys/key.pem", env!("CARGO_MANIFEST_DIR"));

                let cert_bytes = std::fs::read(cert).unwrap();
                let cert = crate::tls::x509::X509::from_pem(&cert_bytes).unwrap();

                let key_bytes = std::fs::read(key).unwrap();
                let key = crate::tls::pkey::PKey::private_key_from_pem(&key_bytes).unwrap();
                ext::ssl_use_certificate(ssl, &cert).unwrap();
                ext::ssl_use_private_key(ssl, &key).unwrap();
            }
        }

        let cb: TlsAcceptCallbacks = Box::new(Callback);

        let (client, server) = tokio::io::duplex(1024);

        tokio::spawn(client_task(client));

        handshake_with_callback(&acceptor, server, &cb)
            .await
            .unwrap();
    }

    #[tokio::test]
    #[cfg(feature = "openssl_derived")]
    async fn test_handshake_complete_callback() {
        use crate::tls::ssl::SslFiletype;

        let cert = format!("{}/tests/keys/server.crt", env!("CARGO_MANIFEST_DIR"));
        let key = format!("{}/tests/keys/key.pem", env!("CARGO_MANIFEST_DIR"));

        let acceptor = {
            let mut builder =
                ssl::SslAcceptor::mozilla_intermediate_v5(ssl::SslMethod::tls()).unwrap();
            builder.set_certificate_chain_file(cert).unwrap();
            builder.set_private_key_file(key, SslFiletype::PEM).unwrap();
            builder.build()
        };

        struct Sni(String);
        struct Callback;
        #[async_trait]
        impl TlsAccept for Callback {
            async fn handshake_complete_callback(
                &self,
                ssl: &TlsRef,
            ) -> Option<Arc<dyn std::any::Any + Send + Sync>> {
                let sni = ssl.servername(ssl::NameType::HOST_NAME)?.to_string();
                Some(Arc::new(Sni(sni)))
            }
        }

        let cb: TlsAcceptCallbacks = Box::new(Callback);

        let (client, server) = tokio::io::duplex(1024);

        tokio::spawn(client_task(client));

        let stream = handshake_with_callback(&acceptor, server, &cb)
            .await
            .unwrap();
        let ssl_digest = stream.ssl_digest().unwrap();
        let sni = ssl_digest.extension.get::<Sni>().unwrap();
        assert_eq!(sni.0, "pingora.org");
    }
}


================================================
FILE: pingora-core/src/protocols/tls/boringssl_openssl/stream.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use crate::protocols::digest::TimingDigest;
use crate::protocols::tls::{SslDigest, ALPN};
use crate::protocols::{Peek, Ssl, UniqueID, UniqueIDType};
use crate::tls::{self, ssl, tokio_ssl::SslStream as InnerSsl};
use crate::utils::tls::{get_organization, get_serial};
use log::warn;
use pingora_error::{ErrorType::*, OrErr, Result};
use std::pin::Pin;
use std::sync::Arc;
use std::task::{Context, Poll};
use std::time::SystemTime;
use tokio::io::{self, AsyncRead, AsyncWrite, ReadBuf};

#[cfg(feature = "boringssl")]
use pingora_boringssl as ssl_lib;

#[cfg(feature = "openssl")]
use pingora_openssl as ssl_lib;

use ssl_lib::{hash::MessageDigest, ssl::SslRef};

/// The TLS connection
#[derive(Debug)]
pub struct SslStream<T> {
    ssl: InnerSsl<T>,
    digest: Option<Arc<SslDigest>>,
    pub(super) timing: TimingDigest,
}

impl<T> SslStream<T>
where
    T: AsyncRead + AsyncWrite + std::marker::Unpin,
{
    /// Create a new TLS connection from the given `stream`
    ///
    /// The caller needs to perform [`Self::connect()`] or [`Self::accept()`] to perform TLS
    /// handshake after.
    pub fn new(ssl: ssl::Ssl, stream: T) -> Result<Self> {
        let ssl = InnerSsl::new(ssl, stream)
            .explain_err(TLSHandshakeFailure, |e| format!("ssl stream error: {e}"))?;

        Ok(SslStream {
            ssl,
            digest: None,
            timing: Default::default(),
        })
    }

    /// Connect to the remote TLS server as a client
    pub async fn connect(&mut self) -> Result<(), ssl::Error> {
        Self::clear_error();
        Pin::new(&mut self.ssl).connect().await?;
        self.timing.established_ts = SystemTime::now();
        self.digest = Some(Arc::new(SslDigest::from_ssl(self.ssl())));
        Ok(())
    }

    /// Finish the TLS handshake from client as a server
    pub async fn accept(&mut self) -> Result<(), ssl::Error> {
        Self::clear_error();
        Pin::new(&mut self.ssl).accept().await?;
        self.timing.established_ts = SystemTime::now();
        self.digest = Some(Arc::new(SslDigest::from_ssl(self.ssl())));
        Ok(())
    }

    #[inline]
    fn clear_error() {
        let errs = tls::error::ErrorStack::get();
        if !errs.errors().is_empty() {
            warn!("Clearing dirty TLS error stack: {}", errs);
        }
    }
}

impl<T> SslStream<T> {
    pub fn ssl_digest(&self) -> Option<Arc<SslDigest>> {
        self.digest.clone()
    }

    /// Attempts to obtain a mutable reference to the SslDigest.
    /// This method returns `None` if the SslDigest is currently held by other references.
    pub(crate) fn ssl_digest_mut(&mut self) -> Option<&mut SslDigest> {
        Arc::get_mut(self.digest.as_mut()?)
    }
}

use std::ops::{Deref, DerefMut};

impl<T> Deref for SslStream<T> {
    type Target = InnerSsl<T>;

    fn deref(&self) -> &Self::Target {
        &self.ssl
    }
}

impl<T> DerefMut for SslStream<T> {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.ssl
    }
}

impl<T> AsyncRead for SslStream<T>
where
    T: AsyncRead + AsyncWrite + Unpin,
{
    fn poll_read(
        mut self: Pin<&mut Self>,
        cx: &mut Context<'_>,
        buf: &mut ReadBuf<'_>,
    ) -> Poll<io::Result<()>> {
        Self::clear_error();
        Pin::new(&mut self.ssl).poll_read(cx, buf)
    }
}

impl<T> AsyncWrite for SslStream<T>
where
    T: AsyncRead + AsyncWrite + Unpin,
{
    fn poll_write(
        mut self: Pin<&mut Self>,
        cx: &mut Context,
        buf: &[u8],
    ) -> Poll<io::Result<usize>> {
        Self::clear_error();
        Pin::new(&mut self.ssl).poll_write(cx, buf)
    }

    fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>> {
        Self::clear_error();
        Pin::new(&mut self.ssl).poll_flush(cx)
    }

    fn poll_shutdown(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>> {
        Self::clear_error();
        Pin::new(&mut self.ssl).poll_shutdown(cx)
    }

    fn poll_write_vectored(
        mut self: Pin<&mut Self>,
        cx: &mut Context<'_>,
        bufs: &[std::io::IoSlice<'_>],
    ) -> Poll<io::Result<usize>> {
        Self::clear_error();
        Pin::new(&mut self.ssl).poll_write_vectored(cx, bufs)
    }

    fn is_write_vectored(&self) -> bool {
        true
    }
}

impl<T> UniqueID for SslStream<T>
where
    T: UniqueID,
{
    fn id(&self) -> UniqueIDType {
        self.ssl.get_ref().id()
    }
}

impl<T> Ssl for SslStream<T> {
    fn get_ssl(&self) -> Option<&ssl::SslRef> {
        Some(self.ssl())
    }

    fn get_ssl_digest(&self) -> Option<Arc<SslDigest>> {
        self.ssl_digest()
    }

    /// Return selected ALPN if any
    fn selected_alpn_proto(&self) -> Option<ALPN> {
        let ssl = self.get_ssl()?;
        ALPN::from_wire_selected(ssl.selected_alpn_protocol()?)
    }
}

impl SslDigest {
    pub fn from_ssl(ssl: &SslRef) -> Self {
        let cipher = match ssl.current_cipher() {
            Some(c) => c.name(),
            None => "",
        };

        let (cert_digest, org, sn) = match ssl.peer_certificate() {
            Some(cert) => {
                let cert_digest = match cert.digest(MessageDigest::sha256()) {
                    Ok(c) => c.as_ref().to_vec(),
                    Err(_) => Vec::new(),
                };
                (cert_digest, get_organization(&cert), get_serial(&cert).ok())
            }
            None => (Vec::new(), None, None),
        };

        SslDigest::new(cipher, ssl.version_str(), org, sn, cert_digest)
    }
}

// TODO: implement Peek if needed
impl<T> Peek for SslStream<T> {}


================================================
FILE: pingora-core/src/protocols/tls/digest.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! TLS information from the TLS connection

use std::any::Any;
use std::borrow::Cow;
use std::sync::Arc;

/// The TLS connection information
#[derive(Clone, Debug)]
pub struct SslDigest {
    /// The cipher used
    pub cipher: Cow<'static, str>,
    /// The TLS version of this connection
    pub version: Cow<'static, str>,
    /// The organization of the peer's certificate
    pub organization: Option<String>,
    /// The serial number of the peer's certificate
    pub serial_number: Option<String>,
    /// The digest of the peer's certificate
    pub cert_digest: Vec<u8>,
    /// The user-defined TLS data
    pub extension: SslDigestExtension,
}

impl SslDigest {
    /// Create a new SslDigest
    pub fn new<S>(
        cipher: S,
        version: S,
        organization: Option<String>,
        serial_number: Option<String>,
        cert_digest: Vec<u8>,
    ) -> Self
    where
        S: Into<Cow<'static, str>>,
    {
        SslDigest {
            cipher: cipher.into(),
            version: version.into(),
            organization,
            serial_number,
            cert_digest,
            extension: SslDigestExtension::default(),
        }
    }
}

/// The user-defined TLS data
#[derive(Clone, Debug, Default)]
pub struct SslDigestExtension {
    value: Option<Arc<dyn Any + Send + Sync>>,
}

impl SslDigestExtension {
    /// Retrieves a reference to the user-defined TLS data if it matches the specified type.
    ///
    /// Returns `None` if no data has been set or if the data is not of type `T`.
    pub fn get<T>(&self) -> Option<&T>
    where
        T: Send + Sync + 'static,
    {
        self.value.as_ref().and_then(|v| v.downcast_ref::<T>())
    }

    #[allow(dead_code)]
    pub(crate) fn set(&mut self, value: Arc<dyn Any + Send + Sync>) {
        self.value = Some(value);
    }
}


================================================
FILE: pingora-core/src/protocols/tls/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! The TLS layer implementations

pub mod digest;
pub use digest::*;

#[cfg(feature = "openssl_derived")]
mod boringssl_openssl;

#[cfg(feature = "openssl_derived")]
pub use boringssl_openssl::*;

#[cfg(feature = "rustls")]
mod rustls;

#[cfg(feature = "rustls")]
pub use rustls::*;

#[cfg(feature = "s2n")]
mod s2n;

#[cfg(feature = "s2n")]
pub use s2n::*;

#[cfg(not(feature = "any_tls"))]
pub mod noop_tls;

#[cfg(not(feature = "any_tls"))]
pub use noop_tls::*;

/// Containing type for a user callback to generate extensions for the `SslDigest` upon handshake
/// completion.
pub type HandshakeCompleteHook = std::sync::Arc<
    dyn Fn(&TlsRef) -> Option<std::sync::Arc<dyn std::any::Any + Send + Sync>> + Send + Sync,
>;

/// The protocol for Application-Layer Protocol Negotiation
#[derive(Hash, Clone, Debug, PartialEq, PartialOrd)]
pub enum ALPN {
    /// Prefer HTTP/1.1 only
    H1,
    /// Prefer HTTP/2 only
    H2,
    /// Prefer HTTP/2 over HTTP/1.1
    H2H1,
    /// Custom Protocol is stored in wire format (length-prefixed)
    /// Wire format is precomputed at creation to avoid dangling references
    Custom(CustomALPN),
}

/// Represents a Custom ALPN Protocol with a precomputed wire format and header offset.
#[derive(Hash, Clone, Debug, PartialEq, PartialOrd)]
pub struct CustomALPN {
    wire: Vec<u8>,
    header: usize,
}

impl CustomALPN {
    /// Create a new CustomALPN from a protocol byte vector
    pub fn new(proto: Vec<u8>) -> Self {
        // Validate before setting
        assert!(!proto.is_empty(), "Custom ALPN protocol must not be empty");
        // RFC-7301
        assert!(
            proto.len() <= 255,
            "ALPN protocol name must be 255 bytes or fewer"
        );

        match proto.as_slice() {
            b"http/1.1" | b"h2" => {
                panic!("Custom ALPN cannot be a reserved protocol (http/1.1 or h2)")
            }
            _ => {}
        }
        let mut wire = Vec::with_capacity(1 + proto.len());
        wire.push(proto.len() as u8);
        wire.extend_from_slice(&proto);

        Self {
            wire,
            header: 1, // Header is always at index 1 since we prefix one length byte
        }
    }

    /// Get the custom protocol name as a slice
    pub fn protocol(&self) -> &[u8] {
        &self.wire[self.header..]
    }

    /// Get the wire format used for ALPN negotiation
    pub fn as_wire(&self) -> &[u8] {
        &self.wire
    }
}

impl std::fmt::Display for ALPN {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            ALPN::H1 => write!(f, "H1"),
            ALPN::H2 => write!(f, "H2"),
            ALPN::H2H1 => write!(f, "H2H1"),
            ALPN::Custom(custom) => {
                // extract protocol name, print as UTF-8 if possible, else judt itd raw bytes
                match std::str::from_utf8(custom.protocol()) {
                    Ok(s) => write!(f, "Custom({})", s),
                    Err(_) => write!(f, "Custom({:?})", custom.protocol()),
                }
            }
        }
    }
}

impl ALPN {
    /// Create a new ALPN according to the `max` and `min` version constraints
    pub fn new(max: u8, min: u8) -> Self {
        if max == 1 {
            ALPN::H1
        } else if min == 2 {
            ALPN::H2
        } else {
            ALPN::H2H1
        }
    }

    /// Return the max http version this [`ALPN`] allows
    pub fn get_max_http_version(&self) -> u8 {
        match self {
            ALPN::H1 => 1,
            ALPN::H2 | ALPN::H2H1 => 2,
            ALPN::Custom(_) => 0,
        }
    }

    /// Return the min http version this [`ALPN`] allows
    pub fn get_min_http_version(&self) -> u8 {
        match self {
            ALPN::H1 | ALPN::H2H1 => 1,
            ALPN::H2 => 2,
            ALPN::Custom(_) => 0,
        }
    }

    #[cfg(feature = "openssl_derived")]
    pub(crate) fn to_wire_preference(&self) -> &[u8] {
        // https://www.openssl.org/docs/manmaster/man3/SSL_CTX_set_alpn_select_cb.html
        // "vector of nonempty, 8-bit length-prefixed, byte strings"
        match self {
            Self::H1 => b"\x08http/1.1",
            Self::H2 => b"\x02h2",
            Self::H2H1 => b"\x02h2\x08http/1.1",
            Self::Custom(custom) => custom.as_wire(),
        }
    }

    #[cfg(feature = "any_tls")]
    pub(crate) fn from_wire_selected(raw: &[u8]) -> Option<Self> {
        match raw {
            b"http/1.1" => Some(Self::H1),
            b"h2" => Some(Self::H2),
            _ => Some(Self::Custom(CustomALPN::new(raw.to_vec()))),
        }
    }

    #[cfg(feature = "rustls")]
    pub(crate) fn to_wire_protocols(&self) -> Vec<Vec<u8>> {
        match self {
            ALPN::H1 => vec![b"http/1.1".to_vec()],
            ALPN::H2 => vec![b"h2".to_vec()],
            ALPN::H2H1 => vec![b"h2".to_vec(), b"http/1.1".to_vec()],
            ALPN::Custom(custom) => vec![custom.protocol().to_vec()],
        }
    }

    #[cfg(feature = "s2n")]
    pub(crate) fn to_wire_protocols(&self) -> Vec<Vec<u8>> {
        match self {
            ALPN::H1 => vec![b"http/1.1".to_vec()],
            ALPN::H2 => vec![b"h2".to_vec()],
            ALPN::H2H1 => vec![b"h2".to_vec(), b"http/1.1".to_vec()],
            ALPN::Custom(custom) => vec![custom.protocol().to_vec()],
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_valid_alpn_construction_and_versions() {
        // Standard Protocols
        assert_eq!(ALPN::H1.get_min_http_version(), 1);
        assert_eq!(ALPN::H1.get_max_http_version(), 1);

        assert_eq!(ALPN::H2.get_min_http_version(), 2);
        assert_eq!(ALPN::H2.get_max_http_version(), 2);

        assert_eq!(ALPN::H2H1.get_min_http_version(), 1);
        assert_eq!(ALPN::H2H1.get_max_http_version(), 2);

        // Custom Protocol
        let custom_protocol = ALPN::Custom(CustomALPN::new("custom/1.0".into()));
        assert_eq!(custom_protocol.get_min_http_version(), 0);
        assert_eq!(custom_protocol.get_max_http_version(), 0);
    }
    #[test]
    #[should_panic(expected = "Custom ALPN protocol must not be empty")]
    fn test_empty_custom_alpn() {
        let _ = ALPN::Custom(CustomALPN::new("".into()));
    }
    #[test]
    #[should_panic(expected = "ALPN protocol name must be 255 bytes or fewer")]
    fn test_large_custom_alpn() {
        let large_alpn = vec![b'a'; 256];
        let _ = ALPN::Custom(CustomALPN::new(large_alpn));
    }
    #[test]
    #[should_panic(expected = "Custom ALPN cannot be a reserved protocol (http/1.1 or h2)")]
    fn test_custom_h1_alpn() {
        let _ = ALPN::Custom(CustomALPN::new("http/1.1".into()));
    }
    #[test]
    #[should_panic(expected = "Custom ALPN cannot be a reserved protocol (http/1.1 or h2)")]
    fn test_custom_h2_alpn() {
        let _ = ALPN::Custom(CustomALPN::new("h2".into()));
    }
}


================================================
FILE: pingora-core/src/protocols/tls/noop_tls/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! This is a set of stubs that provides the minimum types to let pingora work
//! without any tls providers configured

pub struct TlsRef;

pub type CaType = [CertWrapper];

#[derive(Debug)]
pub struct CertWrapper;

impl CertWrapper {
    pub fn not_after(&self) -> &str {
        ""
    }
}

pub mod connectors {
    use pingora_error::Result;

    use crate::{
        connectors::ConnectorOptions,
        protocols::{ALPN, IO},
        upstreams::peer::Peer,
    };

    use super::stream::SslStream;

    #[derive(Clone)]
    pub struct Connector {
        pub ctx: TlsConnector,
    }

    #[derive(Clone)]
    pub struct TlsConnector;

    pub struct TlsSettings;

    impl Connector {
        pub fn new(_: Option<ConnectorOptions>) -> Self {
            Self { ctx: TlsConnector }
        }
    }

    pub async fn connect<T, P>(
        _: T,
        _: &P,
        _: Option<ALPN>,
        _: &TlsConnector,
    ) -> Result<SslStream<T>>
    where
        T: IO,
        P: Peer + Send + Sync,
    {
        Ok(SslStream::default())
    }
}

pub mod listeners {
    use pingora_error::Result;
    use tokio::io::{AsyncRead, AsyncWrite};

    use super::stream::SslStream;

    pub struct Acceptor;

    pub struct TlsSettings;

    impl TlsSettings {
        pub fn build(&self) -> Acceptor {
            Acceptor
        }

        pub fn intermediate(_: &str, _: &str) -> Result<Self> {
            Ok(Self)
        }

        pub fn enable_h2(&mut self) {}
    }

    impl Acceptor {
        pub async fn tls_handshake<S: AsyncRead + AsyncWrite>(&self, _: S) -> Result<SslStream<S>> {
            unimplemented!("No tls feature was specified")
        }
    }
}

pub mod stream {
    use std::{
        pin::Pin,
        task::{Context, Poll},
    };

    use async_trait::async_trait;
    use tokio::io::{AsyncRead, AsyncWrite, ReadBuf};

    use crate::protocols::{
        GetProxyDigest, GetSocketDigest, GetTimingDigest, Peek, Shutdown, Ssl, UniqueID,
    };

    /// A TLS session over a stream.
    #[derive(Debug)]
    pub struct SslStream<S> {
        marker: std::marker::PhantomData<S>,
    }

    impl<S> Default for SslStream<S> {
        fn default() -> Self {
            Self {
                marker: Default::default(),
            }
        }
    }

    impl<S> AsyncRead for SslStream<S>
    where
        S: AsyncRead + AsyncWrite,
    {
        fn poll_read(
            self: Pin<&mut Self>,
            _ctx: &mut Context<'_>,
            _buf: &mut ReadBuf<'_>,
        ) -> Poll<std::io::Result<()>> {
            Poll::Ready(Ok(()))
        }
    }

    impl<S> AsyncWrite for SslStream<S>
    where
        S: AsyncRead + AsyncWrite,
    {
        fn poll_write(
            self: Pin<&mut Self>,
            _ctx: &mut Context<'_>,
            buf: &[u8],
        ) -> Poll<std::io::Result<usize>> {
            Poll::Ready(Ok(buf.len()))
        }

        fn poll_flush(self: Pin<&mut Self>, _ctx: &mut Context<'_>) -> Poll<std::io::Result<()>> {
            Poll::Ready(Ok(()))
        }

        fn poll_shutdown(
            self: Pin<&mut Self>,
            _ctx: &mut Context<'_>,
        ) -> Poll<std::io::Result<()>> {
            Poll::Ready(Ok(()))
        }
    }

    #[async_trait]
    impl<S: Send> Shutdown for SslStream<S> {
        async fn shutdown(&mut self) {}
    }

    impl<S> UniqueID for SslStream<S> {
        fn id(&self) -> crate::protocols::UniqueIDType {
            0
        }
    }

    impl<S> Ssl for SslStream<S> {}

    impl<S> GetTimingDigest for SslStream<S> {
        fn get_timing_digest(&self) -> Vec<Option<crate::protocols::TimingDigest>> {
            vec![]
        }
    }

    impl<S> GetProxyDigest for SslStream<S> {
        fn get_proxy_digest(
            &self,
        ) -> Option<std::sync::Arc<crate::protocols::raw_connect::ProxyDigest>> {
            None
        }
    }

    impl<S> GetSocketDigest for SslStream<S> {
        fn get_socket_digest(&self) -> Option<std::sync::Arc<crate::protocols::SocketDigest>> {
            None
        }
    }

    impl<S> Peek for SslStream<S> {}
}

pub mod utils {
    use std::fmt::Display;

    use super::CertWrapper;

    #[derive(Debug, Clone, Hash)]
    pub struct CertKey;

    impl Display for CertKey {
        fn fmt(&self, _: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
            Ok(())
        }
    }

    pub fn get_organization_unit(_: &CertWrapper) -> Option<String> {
        None
    }
}


================================================
FILE: pingora-core/src/protocols/tls/rustls/client.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Rustls TLS client specific implementation

use crate::protocols::tls::rustls::TlsStream;
use crate::protocols::IO;
use pingora_error::ErrorType::TLSHandshakeFailure;
use pingora_error::{Error, OrErr, Result};
use pingora_rustls::TlsConnector;

// Perform the TLS handshake for the given connection with the given configuration
pub async fn handshake<S: IO>(
    connector: &TlsConnector,
    domain: &str,
    io: S,
) -> Result<TlsStream<S>> {
    let mut stream = TlsStream::from_connector(connector, domain, io)
        .await
        .or_err(TLSHandshakeFailure, "tls stream error")?;

    let handshake_result = stream.connect().await;
    match handshake_result {
        Ok(()) => Ok(stream),
        Err(e) => {
            let context = format!("TLS connect() failed: {e}, SNI: {domain}");
            Error::e_explain(TLSHandshakeFailure, context)
        }
    }
}


================================================
FILE: pingora-core/src/protocols/tls/rustls/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

pub mod client;
pub mod server;
mod stream;

pub use stream::*;

use crate::utils::tls::WrappedX509;

pub type CaType = [WrappedX509];

pub struct TlsRef;


================================================
FILE: pingora-core/src/protocols/tls/rustls/server.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Rustls TLS server specific implementation

use crate::listeners::TlsAcceptCallbacks;
use crate::protocols::tls::rustls::TlsStream;
use crate::protocols::tls::TlsRef;
use crate::protocols::IO;
use crate::{listeners::tls::Acceptor, protocols::Shutdown};
use async_trait::async_trait;
use log::warn;
use pingora_error::{ErrorType::*, OrErr, Result};
use std::pin::Pin;
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};

impl<S: AsyncRead + AsyncWrite + Send + Unpin> TlsStream<S> {
    async fn start_accept(mut self: Pin<&mut Self>) -> Result<bool> {
        // TODO: suspend cert callback
        let res = self.accept().await;

        match res {
            Ok(()) => Ok(true),
            Err(e) => {
                if e.etype == TLSWantX509Lookup {
                    Ok(false)
                } else {
                    Err(e)
                }
            }
        }
    }

    async fn resume_accept(mut self: Pin<&mut Self>) -> Result<()> {
        // TODO: unblock cert callback
        self.accept().await
    }
}

async fn prepare_tls_stream<S: IO>(acceptor: &Acceptor, io: S) -> Result<TlsStream<S>> {
    TlsStream::from_acceptor(acceptor, io)
        .await
        .explain_err(TLSHandshakeFailure, |e| format!("tls stream error: {e}"))
}

/// Perform TLS handshake for the given connection with the given configuration
pub async fn handshake<S: IO>(acceptor: &Acceptor, io: S) -> Result<TlsStream<S>> {
    let mut stream = prepare_tls_stream(acceptor, io).await?;
    stream
        .accept()
        .await
        .explain_err(TLSHandshakeFailure, |e| format!("TLS accept() failed: {e}"))?;
    Ok(stream)
}

/// Perform TLS handshake for the given connection with the given configuration and callbacks
/// callbacks are currently not supported within pingora Rustls and are ignored
pub async fn handshake_with_callback<S: IO>(
    acceptor: &Acceptor,
    io: S,
    callbacks: &TlsAcceptCallbacks,
) -> Result<TlsStream<S>> {
    let mut tls_stream = prepare_tls_stream(acceptor, io).await?;
    let done = Pin::new(&mut tls_stream).start_accept().await?;
    if !done {
        // TODO: verify if/how callback in handshake can be done using Rustls
        warn!("Callacks are not supported with feature \"rustls\".");

        Pin::new(&mut tls_stream)
            .resume_accept()
            .await
            .explain_err(TLSHandshakeFailure, |e| format!("TLS accept() failed: {e}"))?;
    }
    {
        let tls_ref = TlsRef;
        if let Some(extension) = callbacks.handshake_complete_callback(&tls_ref).await {
            if let Some(digest_mut) = tls_stream.ssl_digest_mut() {
                digest_mut.extension.set(extension);
            }
        }
    }
    Ok(tls_stream)
}

#[async_trait]
impl<S> Shutdown for TlsStream<S>
where
    S: AsyncRead + AsyncWrite + Sync + Unpin + Send,
{
    async fn shutdown(&mut self) {
        match <Self as AsyncWriteExt>::shutdown(self).await {
            Ok(()) => {}
            Err(e) => {
                warn!("TLS shutdown failed, {e}");
            }
        }
    }
}

#[ignore]
#[tokio::test]
async fn test_async_cert() {
    todo!("callback support and test for Rustls")
}


================================================
FILE: pingora-core/src/protocols/tls/rustls/stream.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::io::Result as IoResult;
use std::ops::{Deref, DerefMut};
use std::pin::Pin;
use std::sync::Arc;
use std::task::{Context, Poll};
use std::time::{Duration, SystemTime};

use crate::listeners::tls::Acceptor;
use crate::protocols::raw_connect::ProxyDigest;
use crate::protocols::{tls::SslDigest, Peek, TimingDigest, UniqueIDType};
use crate::protocols::{
    GetProxyDigest, GetSocketDigest, GetTimingDigest, SocketDigest, Ssl, UniqueID, ALPN,
};
use crate::utils::tls::get_organization_serial_bytes;
use pingora_error::ErrorType::{AcceptError, ConnectError, InternalError, TLSHandshakeFailure};
use pingora_error::{OkOrErr, OrErr, Result};
use pingora_rustls::TlsStream as RusTlsStream;
use pingora_rustls::{hash_certificate, NoDebug};
use pingora_rustls::{Accept, Connect, ServerName, TlsConnector};
use tokio::io::{AsyncRead, AsyncWrite, ReadBuf};
use x509_parser::nom::AsBytes;

#[derive(Debug)]
pub struct InnerStream<T> {
    pub(crate) stream: Option<RusTlsStream<T>>,
    connect: NoDebug<Option<Connect<T>>>,
    accept: NoDebug<Option<Accept<T>>>,
}

/// The TLS connection
#[derive(Debug)]
pub struct TlsStream<T> {
    tls: InnerStream<T>,
    digest: Option<Arc<SslDigest>>,
    timing: TimingDigest,
}

impl<T> TlsStream<T>
where
    T: AsyncRead + AsyncWrite + Unpin + Send,
{
    /// Create a new TLS connection from the given `stream`
    ///
    /// Using RustTLS the stream is only returned after the handshake.
    /// The caller does therefor not need to perform [`Self::connect()`].
    pub async fn from_connector(connector: &TlsConnector, domain: &str, stream: T) -> Result<Self> {
        let server = ServerName::try_from(domain).or_err_with(InternalError, || {
            format!("Invalid Input: Failed to parse domain: {domain}")
        })?;

        let tls = InnerStream::from_connector(connector, server, stream)
            .await
            .explain_err(TLSHandshakeFailure, |e| format!("tls stream error: {e}"))?;

        Ok(TlsStream {
            tls,
            digest: None,
            timing: Default::default(),
        })
    }

    /// Create a new TLS connection from the given `stream`
    ///
    /// Using RustTLS the stream is only returned after the handshake.
    /// The caller does therefor not need to perform [`Self::accept()`].
    pub(crate) async fn from_acceptor(acceptor: &Acceptor, stream: T) -> Result<Self> {
        let tls = InnerStream::from_acceptor(acceptor, stream)
            .await
            .explain_err(TLSHandshakeFailure, |e| format!("tls stream error: {e}"))?;

        Ok(TlsStream {
            tls,
            digest: None,
            timing: Default::default(),
        })
    }
}

impl<S> GetSocketDigest for TlsStream<S>
where
    S: GetSocketDigest,
{
    fn get_socket_digest(&self) -> Option<Arc<SocketDigest>> {
        self.tls.get_socket_digest()
    }
    fn set_socket_digest(&mut self, socket_digest: SocketDigest) {
        self.tls.set_socket_digest(socket_digest)
    }
}

impl<S> GetTimingDigest for TlsStream<S>
where
    S: GetTimingDigest,
{
    fn get_timing_digest(&self) -> Vec<Option<TimingDigest>> {
        let mut ts_vec = self.tls.get_timing_digest();
        ts_vec.push(Some(self.timing.clone()));
        ts_vec
    }
    fn get_read_pending_time(&self) -> Duration {
        self.tls.get_read_pending_time()
    }

    fn get_write_pending_time(&self) -> Duration {
        self.tls.get_write_pending_time()
    }
}

impl<S> GetProxyDigest for TlsStream<S>
where
    S: GetProxyDigest,
{
    fn get_proxy_digest(&self) -> Option<Arc<ProxyDigest>> {
        self.tls.get_proxy_digest()
    }
}

impl<T> TlsStream<T> {
    pub fn ssl_digest(&self) -> Option<Arc<SslDigest>> {
        self.digest.clone()
    }

    /// Attempts to obtain a mutable reference to the SslDigest.
    /// This method returns `None` if the SslDigest is currently held by other references.
    pub(crate) fn ssl_digest_mut(&mut self) -> Option<&mut SslDigest> {
        Arc::get_mut(self.digest.as_mut()?)
    }
}

impl<T> Deref for TlsStream<T> {
    type Target = InnerStream<T>;

    fn deref(&self) -> &Self::Target {
        &self.tls
    }
}

impl<T> DerefMut for TlsStream<T> {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.tls
    }
}

impl<T> TlsStream<T>
where
    T: AsyncRead + AsyncWrite + Unpin + Send,
{
    /// Connect to the remote TLS server as a client
    pub(crate) async fn connect(&mut self) -> Result<()> {
        self.tls.connect().await?;
        self.timing.established_ts = SystemTime::now();
        self.digest = self.tls.digest();
        Ok(())
    }

    /// Finish the TLS handshake from client as a server
    pub(crate) async fn accept(&mut self) -> Result<()> {
        self.tls.accept().await?;
        self.timing.established_ts = SystemTime::now();
        self.digest = self.tls.digest();
        Ok(())
    }
}

impl<T> AsyncRead for TlsStream<T>
where
    T: AsyncRead + AsyncWrite + Unpin,
{
    fn poll_read(
        mut self: Pin<&mut Self>,
        cx: &mut Context<'_>,
        buf: &mut ReadBuf<'_>,
    ) -> Poll<IoResult<()>> {
        Pin::new(&mut self.tls.stream.as_mut().unwrap()).poll_read(cx, buf)
    }
}

impl<T> AsyncWrite for TlsStream<T>
where
    T: AsyncRead + AsyncWrite + Unpin,
{
    fn poll_write(mut self: Pin<&mut Self>, cx: &mut Context, buf: &[u8]) -> Poll<IoResult<usize>> {
        Pin::new(&mut self.tls.stream.as_mut().unwrap()).poll_write(cx, buf)
    }

    fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<IoResult<()>> {
        Pin::new(&mut self.tls.stream.as_mut().unwrap()).poll_flush(cx)
    }

    fn poll_shutdown(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<IoResult<()>> {
        Pin::new(&mut self.tls.stream.as_mut().unwrap()).poll_shutdown(cx)
    }

    fn poll_write_vectored(
        mut self: Pin<&mut Self>,
        cx: &mut Context<'_>,
        bufs: &[std::io::IoSlice<'_>],
    ) -> Poll<IoResult<usize>> {
        Pin::new(&mut self.tls.stream.as_mut().unwrap()).poll_write_vectored(cx, bufs)
    }

    fn is_write_vectored(&self) -> bool {
        true
    }
}

impl<T> UniqueID for TlsStream<T>
where
    T: UniqueID,
{
    fn id(&self) -> UniqueIDType {
        self.tls.stream.as_ref().unwrap().get_ref().0.id()
    }
}

impl<T> Ssl for TlsStream<T> {
    fn get_ssl_digest(&self) -> Option<Arc<SslDigest>> {
        self.ssl_digest()
    }

    fn selected_alpn_proto(&self) -> Option<ALPN> {
        let st = self.tls.stream.as_ref();
        if let Some(stream) = st {
            let proto = stream.get_ref().1.alpn_protocol();
            match proto {
                None => None,
                Some(raw) => ALPN::from_wire_selected(raw),
            }
        } else {
            None
        }
    }
}

/// Create a new TLS connection from the given `stream`
///
/// The caller needs to perform [`Self::connect()`] or [`Self::accept()`] to perform TLS
/// handshake after.
impl<T: AsyncRead + AsyncWrite + Unpin> InnerStream<T> {
    pub(crate) async fn from_connector(
        connector: &TlsConnector,
        server: ServerName<'_>,
        stream: T,
    ) -> Result<Self> {
        let connect = connector.connect(server.to_owned(), stream);
        Ok(InnerStream {
            accept: None.into(),
            connect: Some(connect).into(),
            stream: None,
        })
    }

    pub(crate) async fn from_acceptor(acceptor: &Acceptor, stream: T) -> Result<Self> {
        let accept = acceptor.acceptor.accept(stream);

        Ok(InnerStream {
            accept: Some(accept).into(),
            connect: None.into(),
            stream: None,
        })
    }
}

impl<T: AsyncRead + AsyncWrite + Unpin + Send> InnerStream<T> {
    /// Connect to the remote TLS server as a client
    pub(crate) async fn connect(&mut self) -> Result<()> {
        let connect = &mut (*self.connect);
        let connect = connect.take().or_err(
            ConnectError,
            "TLS connect not available to perform handshake.",
        )?;

        let stream = connect
            .await
            .or_err(TLSHandshakeFailure, "tls connect error")?;
        self.stream = Some(RusTlsStream::Client(stream));
        Ok(())
    }

    /// Finish the TLS handshake from client as a server
    /// no-op implementation within Rustls, handshake is performed during creation of stream.
    pub(crate) async fn accept(&mut self) -> Result<()> {
        let accept = &mut (*self.accept);
        let accept = accept.take().or_err(
            AcceptError,
            "TLS accept not available to perform handshake.",
        )?;

        let stream = accept
            .await
            .explain_err(TLSHandshakeFailure, |e| format!("tls connect error: {e}"))?;
        self.stream = Some(RusTlsStream::Server(stream));
        Ok(())
    }

    pub(crate) fn digest(&mut self) -> Option<Arc<SslDigest>> {
        Some(Arc::new(SslDigest::from_stream(&self.stream)))
    }
}

impl<S> GetSocketDigest for InnerStream<S>
where
    S: GetSocketDigest,
{
    fn get_socket_digest(&self) -> Option<Arc<SocketDigest>> {
        if let Some(stream) = self.stream.as_ref() {
            stream.get_ref().0.get_socket_digest()
        } else {
            None
        }
    }
    fn set_socket_digest(&mut self, socket_digest: SocketDigest) {
        self.stream
            .as_mut()
            .unwrap()
            .get_mut()
            .0
            .set_socket_digest(socket_digest)
    }
}

impl<S> GetTimingDigest for InnerStream<S>
where
    S: GetTimingDigest,
{
    fn get_timing_digest(&self) -> Vec<Option<TimingDigest>> {
        self.stream
            .as_ref()
            .unwrap()
            .get_ref()
            .0
            .get_timing_digest()
    }
}

impl<S> GetProxyDigest for InnerStream<S>
where
    S: GetProxyDigest,
{
    fn get_proxy_digest(&self) -> Option<Arc<ProxyDigest>> {
        if let Some(stream) = self.stream.as_ref() {
            stream.get_ref().0.get_proxy_digest()
        } else {
            None
        }
    }
}

impl SslDigest {
    fn from_stream<T>(stream: &Option<RusTlsStream<T>>) -> Self {
        let stream = stream.as_ref().unwrap();
        let (_io, session) = stream.get_ref();
        let protocol = session.protocol_version();
        let cipher_suite = session.negotiated_cipher_suite();
        let peer_certificates = session.peer_certificates();

        let cipher = cipher_suite
            .and_then(|suite| suite.suite().as_str())
            .unwrap_or_default();

        let version = protocol
            .and_then(|proto| proto.as_str())
            .unwrap_or_default();

        let cert_digest = peer_certificates
            .and_then(|certs| certs.first())
            .map(|cert| hash_certificate(cert))
            .unwrap_or_default();

        let (organization, serial_number) = peer_certificates
            .and_then(|certs| certs.first())
            .map(|cert| get_organization_serial_bytes(cert.as_bytes()))
            .transpose()
            .ok()
            .flatten()
            .map(|(organization, serial)| (organization, Some(serial)))
            .unwrap_or_default();

        SslDigest::new(cipher, version, organization, serial_number, cert_digest)
    }
}

impl<S> Peek for TlsStream<S> {}


================================================
FILE: pingora-core/src/protocols/tls/s2n/client.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! S2N client specific implementation

use crate::protocols::tls::{AutoFlushableStream, S2NConnectionBuilder, TlsStream};
use crate::protocols::IO;
use pingora_error::ErrorType::TLSHandshakeFailure;
use pingora_error::{Error, Result};
use pingora_s2n::TlsConnector;

// Perform the TLS handshake for the given connection with the given configuration
pub async fn handshake<S: IO>(
    connector: &TlsConnector<S2NConnectionBuilder>,
    domain: &str,
    stream: S,
) -> Result<TlsStream<S>> {
    // Wrap incoming stream in an auto flushable stream with auto flush enabled because
    // s2n-tls doesn't invoke flush after writing to the connection. This would result in
    // the handshake hanging and timing on streams with write buffering.
    let auto_flushable_stream = AutoFlushableStream::new(stream, true);
    let mut s2n_stream = connector
        .connect(domain, auto_flushable_stream)
        .await
        .map_err(|e| {
            let context = format!("TLS connect() failed: {e}, SNI: {domain}");
            Error::explain(TLSHandshakeFailure, context)
        })?;

    // Disable auto-flush to not interfere with write buffering going forward.
    s2n_stream.get_mut().set_auto_flush(false);
    Ok(TlsStream::from_s2n_stream(s2n_stream))
}


================================================
FILE: pingora-core/src/protocols/tls/s2n/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

pub mod client;
pub mod server;
mod stream;

use std::{
    hash::{Hash, Hasher},
    sync::Arc,
};

use pingora_s2n::{
    Config, Connection, ConnectionBuilder, Mode, Psk as S2NPsk, PskHmac, S2NError, S2NPolicy,
};
pub use stream::*;

use crate::utils::tls::X509Pem;

pub type CaType = X509Pem;

pub type PskType = PskConfig;

#[derive(Debug)]
pub struct PskConfig {
    pub keys: Vec<Psk>,
}

impl PskConfig {
    pub fn new(keys: Vec<Psk>) -> Self {
        Self { keys }
    }
}

impl Hash for PskConfig {
    fn hash<H: Hasher>(&self, state: &mut H) {
        for psk in self.keys.iter() {
            psk.identity.hash(state);
            psk.secret.hash(state);
        }
    }
}

#[derive(Debug)]
pub struct Psk {
    pub identity: Vec<u8>,
    pub secret: Vec<u8>,
    pub hmac: PskHmac,
}

impl Psk {
    pub fn new(identity: String, secret: Vec<u8>, hmac: PskHmac) -> Self {
        Self {
            identity: identity.into_bytes(),
            secret,
            hmac,
        }
    }
}

pub struct TlsRef;

/// Custom s2n-tls connection builder. The s2n-tls-tokio crate doesn't expose
/// a higher level api to configure private shared keys on a TLS connection.
///
/// This builder will create a new connection and configure it with the appropriate
/// psk configurations based on the provided private shared keys.
/// ```
#[derive(Debug, Clone)]
pub struct S2NConnectionBuilder {
    pub config: Config,
    pub psk_config: Option<Arc<PskConfig>>,
    pub security_policy: Option<S2NPolicy>,
}

impl ConnectionBuilder for S2NConnectionBuilder {
    type Output = Connection;
    fn build_connection(&self, mode: Mode) -> std::result::Result<Self::Output, S2NError> {
        let mut conn = Connection::new(mode);
        conn.set_config(self.config.clone())?;

        if let Some(psk_config) = &self.psk_config {
            for psk in psk_config.keys.iter() {
                let mut psk_builder = S2NPsk::builder()?;
                psk_builder.set_identity(&psk.identity)?;
                psk_builder.set_hmac(PskHmac::SHA256)?;
                psk_builder.set_secret(&psk.secret)?;
                conn.append_psk(&psk_builder.build()?)?;
            }
        }

        if let Some(policy) = &self.security_policy {
            conn.set_security_policy(policy)?;
        }

        Ok(conn)
    }
}


================================================
FILE: pingora-core/src/protocols/tls/s2n/server.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! S2N server specific implementation

use crate::listeners::tls::Acceptor;
use crate::protocols::tls::{AutoFlushableStream, TlsStream};
use crate::protocols::IO;
use pingora_error::ErrorType::TLSHandshakeFailure;
use pingora_error::{Error, Result};

pub async fn handshake<S: IO>(acceptor: &Acceptor, stream: S) -> Result<TlsStream<S>> {
    // Wrap incoming stream in an auto flushable stream with auto flush enabled because
    // s2n-tls doesn't invoke flush after writing to the connection. This would result in
    // the handshake hanging and timing on streams with write buffering.
    let auto_flushable_stream = AutoFlushableStream::new(stream, true);
    let mut s2n_stream = acceptor
        .acceptor
        .accept(auto_flushable_stream)
        .await
        .map_err(|e| {
            let context = format!("TLS accept() failed: {e}");
            Error::explain(TLSHandshakeFailure, context)
        })?;

    // Disable auto-flush to not interfere with write buffering going forward.
    s2n_stream.get_mut().set_auto_flush(false);

    Ok(TlsStream::from_s2n_stream(s2n_stream))
}


================================================
FILE: pingora-core/src/protocols/tls/s2n/stream.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use crate::protocols::digest::TimingDigest;
use crate::protocols::raw_connect::ProxyDigest;
use crate::protocols::tls::SslDigest;
use crate::protocols::{
    GetProxyDigest, GetSocketDigest, GetTimingDigest, Peek, Shutdown, SocketDigest, Ssl, UniqueID,
    UniqueIDType, ALPN,
};
use crate::tls::TlsStream as S2NTlsStream;
use crate::utils::tls::get_organization_serial_bytes;
use async_trait::async_trait;
use log::debug;
use pingora_s2n::hash_certificate;
use std::fmt::Debug;
use std::io::Result as IoResult;
use std::ops::{Deref, DerefMut};
use std::pin::Pin;
use std::sync::Arc;
use std::task::{Context, Poll};
use std::time::{Duration, SystemTime};
use tokio::io::{AsyncRead, AsyncWrite, ReadBuf};

/// Stream wrapper that will automatically flush all writes depending on the value of
/// `auto_flush`. That is, it will always call `poll_flush` on every invocation of
/// `poll_write` or `poll_write_vectored`.
///
/// The underlying transport stream implementation (pingora_core::protocols::l4::stream::Stream)
/// used by Pingora buffers writes to the TCP connection. During the handshake process
/// s2n-tls does not flush writes to the TCP connection, which can lead to scenarios
/// where writes are never sent over the connection causing the handshake process to hang
/// and timeout. This wrapper ensures that all writes are flushed to the TCP connection
/// during the handshake process.
pub struct AutoFlushableStream<T: AsyncRead + AsyncWrite + Unpin> {
    stream: T,
    auto_flush: bool,
}

impl<T> AutoFlushableStream<T>
where
    T: AsyncRead + AsyncWrite + Unpin,
{
    pub fn new(stream: T, auto_flush: bool) -> Self {
        AutoFlushableStream { stream, auto_flush }
    }

    pub fn set_auto_flush(&mut self, auto_flush: bool) {
        self.auto_flush = auto_flush;
    }
}

impl<T> AsyncRead for AutoFlushableStream<T>
where
    T: AsyncRead + AsyncWrite + Unpin,
{
    fn poll_read(
        mut self: Pin<&mut Self>,
        cx: &mut Context<'_>,
        buf: &mut ReadBuf<'_>,
    ) -> Poll<IoResult<()>> {
        Pin::new(&mut self.stream).poll_read(cx, buf)
    }
}

impl<T> AsyncWrite for AutoFlushableStream<T>
where
    T: AsyncRead + AsyncWrite + Unpin,
{
    fn poll_write(mut self: Pin<&mut Self>, cx: &mut Context, buf: &[u8]) -> Poll<IoResult<usize>> {
        let write = Pin::new(&mut self.stream).poll_write(cx, buf);
        if self.auto_flush {
            let _ = Pin::new(&mut self.stream).poll_flush(cx);
        }
        write
    }

    fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<IoResult<()>> {
        Pin::new(&mut self.stream).poll_flush(cx)
    }

    fn poll_shutdown(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<IoResult<()>> {
        Pin::new(&mut self.stream).poll_shutdown(cx)
    }

    fn poll_write_vectored(
        mut self: Pin<&mut Self>,
        cx: &mut Context<'_>,
        bufs: &[std::io::IoSlice<'_>],
    ) -> Poll<IoResult<usize>> {
        let write = Pin::new(&mut self.stream).poll_write_vectored(cx, bufs);
        if self.auto_flush {
            let _ = Pin::new(&mut self.stream).poll_flush(cx);
        }
        write
    }

    fn is_write_vectored(&self) -> bool {
        true
    }
}

#[derive(Debug)]
pub struct TlsStream<T: AsyncRead + AsyncWrite + Unpin> {
    stream: S2NTlsStream<AutoFlushableStream<T>>,
    digest: Option<Arc<SslDigest>>,
    pub(super) timing: TimingDigest,
}

impl<T> TlsStream<T>
where
    T: AsyncRead + AsyncWrite + std::marker::Unpin,
{
    pub fn from_s2n_stream(stream: S2NTlsStream<AutoFlushableStream<T>>) -> TlsStream<T> {
        let mut timing: TimingDigest = Default::default();
        timing.established_ts = SystemTime::now();
        let digest = Some(Arc::new(SslDigest::from_stream(Some(&stream))));
        TlsStream {
            stream,
            digest,
            timing,
        }
    }
}

impl<T: AsyncRead + AsyncWrite + std::marker::Unpin> Deref for AutoFlushableStream<T> {
    type Target = T;

    fn deref(&self) -> &Self::Target {
        &self.stream
    }
}

impl<T: AsyncRead + AsyncWrite + std::marker::Unpin> DerefMut for AutoFlushableStream<T> {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.stream
    }
}

impl<T: AsyncRead + AsyncWrite + std::marker::Unpin> Deref for TlsStream<T> {
    type Target = S2NTlsStream<AutoFlushableStream<T>>;

    fn deref(&self) -> &Self::Target {
        &self.stream
    }
}

impl<T: AsyncRead + AsyncWrite + std::marker::Unpin> DerefMut for TlsStream<T> {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.stream
    }
}

impl<T: AsyncRead + AsyncWrite + std::marker::Unpin> Ssl for TlsStream<T> {
    fn get_ssl_digest(&self) -> Option<Arc<SslDigest>> {
        self.ssl_digest()
    }

    fn selected_alpn_proto(&self) -> Option<ALPN> {
        let stream = self.stream.as_ref();
        let proto = stream.application_protocol();

        match proto {
            None => None,
            Some(raw) => ALPN::from_wire_selected(raw),
        }
    }
}

impl<T> TlsStream<T>
where
    T: AsyncRead + AsyncWrite + std::marker::Unpin,
{
    pub fn ssl_digest(&self) -> Option<Arc<SslDigest>> {
        self.digest.clone()
    }
}

impl<T> AsyncRead for TlsStream<T>
where
    T: AsyncRead + AsyncWrite + Unpin,
{
    fn poll_read(
        mut self: Pin<&mut Self>,
        cx: &mut Context<'_>,
        buf: &mut ReadBuf<'_>,
    ) -> Poll<IoResult<()>> {
        debug!("poll_read");
        Pin::new(&mut self.stream).poll_read(cx, buf)
    }
}

impl<T> AsyncWrite for TlsStream<T>
where
    T: AsyncRead + AsyncWrite + Unpin,
{
    fn poll_write(mut self: Pin<&mut Self>, cx: &mut Context, buf: &[u8]) -> Poll<IoResult<usize>> {
        Pin::new(&mut self.stream).poll_write(cx, buf)
    }

    fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<IoResult<()>> {
        Pin::new(&mut self.stream).poll_flush(cx)
    }

    fn poll_shutdown(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<IoResult<()>> {
        Pin::new(&mut self.stream).poll_shutdown(cx)
    }

    fn poll_write_vectored(
        mut self: Pin<&mut Self>,
        cx: &mut Context<'_>,
        bufs: &[std::io::IoSlice<'_>],
    ) -> Poll<IoResult<usize>> {
        Pin::new(&mut self.stream).poll_write_vectored(cx, bufs)
    }

    fn is_write_vectored(&self) -> bool {
        true
    }
}

impl<T> UniqueID for TlsStream<T>
where
    T: UniqueID + AsyncRead + AsyncWrite + Unpin,
{
    fn id(&self) -> UniqueIDType {
        self.stream.get_ref().id()
    }
}

impl<S> GetSocketDigest for TlsStream<S>
where
    S: GetSocketDigest + AsyncRead + AsyncWrite + std::marker::Unpin,
{
    fn get_socket_digest(&self) -> Option<Arc<SocketDigest>> {
        self.stream.get_ref().get_socket_digest()
    }
    fn set_socket_digest(&mut self, socket_digest: SocketDigest) {
        self.stream.get_mut().set_socket_digest(socket_digest)
    }
}

impl<S> GetTimingDigest for TlsStream<S>
where
    S: GetTimingDigest + AsyncRead + AsyncWrite + std::marker::Unpin,
{
    fn get_timing_digest(&self) -> Vec<Option<TimingDigest>> {
        let mut ts_vec = self.stream.get_ref().get_timing_digest();
        ts_vec.push(Some(self.timing.clone()));
        ts_vec
    }

    fn get_read_pending_time(&self) -> Duration {
        self.stream.get_ref().get_read_pending_time()
    }

    fn get_write_pending_time(&self) -> Duration {
        self.stream.get_ref().get_write_pending_time()
    }
}

impl<S> GetProxyDigest for TlsStream<S>
where
    S: GetProxyDigest + AsyncRead + AsyncWrite + std::marker::Unpin,
{
    fn get_proxy_digest(&self) -> Option<Arc<ProxyDigest>> {
        self.stream.get_ref().get_proxy_digest()
    }
}

impl SslDigest {
    fn from_stream<T: AsyncRead + AsyncWrite + Unpin>(stream: Option<&S2NTlsStream<T>>) -> Self {
        let conn = stream.unwrap().as_ref();

        let cipher = conn.cipher_suite().unwrap_or_default().to_string();
        let version = conn
            .actual_protocol_version()
            .map(|v| format!("{:?}", v))
            .unwrap_or_default()
            .to_string();

        let mut organization = None;
        let mut serial_number = None;
        let mut cert_digest = None;

        if let Ok(cert_chain) = conn.peer_cert_chain() {
            if let Some(Ok(cert)) = cert_chain.iter().next() {
                if let Ok(raw_cert) = cert.der() {
                    if let Ok((org, serial)) = get_organization_serial_bytes(raw_cert) {
                        organization = org;
                        serial_number = Some(serial);
                    }
                    cert_digest = Some(hash_certificate(raw_cert));
                }
            }
        }

        SslDigest::new(
            cipher,
            version,
            organization,
            serial_number,
            cert_digest.unwrap_or_default(),
        )
    }
}

impl<S: AsyncRead + AsyncWrite + std::marker::Unpin> Peek for TlsStream<S> {}

#[async_trait]
impl<S: Shutdown + AsyncRead + AsyncWrite + std::marker::Unpin + Send> Shutdown for TlsStream<S> {
    async fn shutdown(&mut self) -> () {
        self.get_mut().shutdown().await
    }
}


================================================
FILE: pingora-core/src/protocols/windows.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Windows specific functionality for calling the WinSock c api
//!
//! Implementations here are based on the implementation in the std library
//! https://github.com/rust-lang/rust/blob/84ac80f/library/std/src/sys_common/net.rs
//! https://github.com/rust-lang/rust/blob/84ac80f/library/std/src/sys/pal/windows/net.rs

use std::os::windows::io::RawSocket;
use std::{io, mem, net::SocketAddr};

use windows_sys::Win32::Networking::WinSock::{
    getpeername, getsockname, AF_INET, AF_INET6, SOCKADDR_IN, SOCKADDR_IN6, SOCKADDR_STORAGE,
    SOCKET,
};

pub(crate) fn peer_addr(raw_sock: RawSocket) -> io::Result<SocketAddr> {
    let mut storage = unsafe { mem::zeroed::<SOCKADDR_STORAGE>() };
    let mut addrlen = mem::size_of_val(&storage) as i32;

    unsafe {
        let res = getpeername(
            raw_sock as SOCKET,
            core::ptr::addr_of_mut!(storage) as *mut _,
            &mut addrlen,
        );
        if res != 0 {
            return Err(io::Error::last_os_error());
        }
    }

    sockaddr_to_addr(&storage, addrlen as usize)
}
pub(crate) fn local_addr(raw_sock: RawSocket) -> io::Result<SocketAddr> {
    let mut storage = unsafe { mem::zeroed::<SOCKADDR_STORAGE>() };
    let mut addrlen = mem::size_of_val(&storage) as i32;

    unsafe {
        let res = getsockname(
            raw_sock as libc::SOCKET,
            core::ptr::addr_of_mut!(storage) as *mut _,
            &mut addrlen,
        );
        if res != 0 {
            return Err(io::Error::last_os_error());
        }
    }

    sockaddr_to_addr(&storage, addrlen as usize)
}

fn sockaddr_to_addr(storage: &SOCKADDR_STORAGE, len: usize) -> io::Result<SocketAddr> {
    match storage.ss_family {
        AF_INET => {
            assert!(len >= mem::size_of::<SOCKADDR_IN>());
            Ok(SocketAddr::from(unsafe {
                let sockaddr = *(storage as *const _ as *const SOCKADDR_IN);
                (
                    sockaddr.sin_addr.S_un.S_addr.to_ne_bytes(),
                    sockaddr.sin_port.to_be(),
                )
            }))
        }
        AF_INET6 => {
            assert!(len >= mem::size_of::<SOCKADDR_IN6>());
            Ok(SocketAddr::from(unsafe {
                let sockaddr = *(storage as *const _ as *const SOCKADDR_IN6);
                (sockaddr.sin6_addr.u.Byte, sockaddr.sin6_port.to_be())
            }))
        }
        _ => Err(io::Error::new(
            io::ErrorKind::InvalidInput,
            "invalid argument",
        )),
    }
}

#[cfg(test)]
mod tests {
    use std::os::windows::io::AsRawSocket;

    use crate::protocols::l4::{listener::Listener, stream::Stream};

    use super::*;

    async fn assert_listener_and_stream(addr: &str) {
        let tokio_listener = tokio::net::TcpListener::bind(addr).await.unwrap();

        let listener_local_addr = tokio_listener.local_addr().unwrap();

        let tokio_stream = tokio::net::TcpStream::connect(listener_local_addr)
            .await
            .unwrap();

        let stream_local_addr = tokio_stream.local_addr().unwrap();
        let stream_peer_addr = tokio_stream.peer_addr().unwrap();

        let stream: Stream = tokio_stream.into();
        let listener: Listener = tokio_listener.into();

        let raw_sock = listener.as_raw_socket();
        assert_eq!(listener_local_addr, local_addr(raw_sock).unwrap());

        let raw_sock = stream.as_raw_socket();
        assert_eq!(stream_peer_addr, peer_addr(raw_sock).unwrap());
        assert_eq!(stream_local_addr, local_addr(raw_sock).unwrap());
    }

    #[tokio::test]
    async fn get_v4_addrs_from_raw_socket() {
        assert_listener_and_stream("127.0.0.1:0").await
    }
    #[tokio::test]
    async fn get_v6_addrs_from_raw_socket() {
        assert_listener_and_stream("[::1]:0").await
    }
}


================================================
FILE: pingora-core/src/server/bootstrap_services.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#[cfg(unix)]
pub use super::transfer_fd::Fds;
use async_trait::async_trait;
use log::{debug, error, info};
use parking_lot::Mutex;
use std::sync::Arc;
use tokio::sync::{broadcast, Mutex as TokioMutex};

#[cfg(feature = "sentry")]
use sentry::ClientOptions;

#[cfg(unix)]
use crate::server::ListenFds;

use crate::{
    prelude::Opt,
    server::{configuration::ServerConf, ExecutionPhase, ShutdownWatch},
    services::{background::BackgroundService, ServiceReadyNotifier},
};

/// Service that allows the bootstrap process to be delayed until after
/// dependencies are ready
pub struct BootstrapService {
    inner: Arc<Mutex<Bootstrap>>,
}

/// Sentry is typically started as part of the bootstrap process, but if the
/// bootstrap service is used, we want to initialize Sentry before anything else
/// to make sure errors are captured.
pub struct SentryInitService {
    inner: Arc<Mutex<Bootstrap>>,
}

impl BootstrapService {
    pub fn new(inner: &Arc<Mutex<Bootstrap>>) -> Self {
        BootstrapService {
            inner: Arc::clone(inner),
        }
    }
}

impl SentryInitService {
    pub fn new(inner: &Arc<Mutex<Bootstrap>>) -> Self {
        SentryInitService {
            inner: Arc::clone(inner),
        }
    }
}

/// Encapsulation of the data needed to bootstrap the server
pub struct Bootstrap {
    completed: bool,

    test: bool,
    upgrade: bool,

    upgrade_sock: String,

    execution_phase_watch: broadcast::Sender<ExecutionPhase>,

    #[cfg(unix)]
    listen_fds: Option<ListenFds>,

    #[cfg(feature = "sentry")]
    #[cfg_attr(docsrs, doc(cfg(feature = "sentry")))]
    /// The Sentry ClientOptions.
    ///
    /// Panics and other events sentry captures will be sent to this DSN **only
    /// in release mode**
    pub sentry: Option<ClientOptions>,
}

impl Bootstrap {
    pub fn new(
        options: &Option<Opt>,
        conf: &ServerConf,
        execution_phase_watch: &broadcast::Sender<ExecutionPhase>,
    ) -> Self {
        let (test, upgrade) = options
            .as_ref()
            .map(|opt| (opt.test, opt.upgrade))
            .unwrap_or_default();

        let upgrade_sock = conf.upgrade_sock.clone();

        Bootstrap {
            test,
            upgrade,
            upgrade_sock,
            #[cfg(unix)]
            listen_fds: None,
            execution_phase_watch: execution_phase_watch.clone(),
            completed: false,
            #[cfg(feature = "sentry")]
            sentry: None,
        }
    }

    #[cfg(feature = "sentry")]
    pub fn set_sentry_config(&mut self, sentry_config: Option<ClientOptions>) {
        self.sentry = sentry_config;
    }

    /// Start sentry based on the configured options. To prevent multiple
    /// initializations, this function will consume the sentry configuration
    /// stored in the bootstrap
    fn start_sentry(&mut self) {
        // Only init sentry in release builds
        #[cfg(all(not(debug_assertions), feature = "sentry"))]
        let _guard = self.sentry.take().map(|opts| sentry::init(opts));
    }

    pub fn bootstrap(&mut self) {
        // already bootstrapped
        if self.completed {
            return;
        }

        info!("Bootstrap starting");

        self.execution_phase_watch
            .send(ExecutionPhase::Bootstrap)
            .ok();

        self.start_sentry();

        if self.test {
            info!("Server Test passed, exiting");
            std::process::exit(0);
        }

        // load fds
        #[cfg(unix)]
        match self.load_fds(self.upgrade) {
            Ok(_) => {
                info!("Bootstrap done");
            }
            Err(e) => {
                // sentry log error on fd load failure
                #[cfg(all(not(debug_assertions), feature = "sentry"))]
                sentry::capture_error(&e);

                error!("Bootstrap failed on error: {:?}, exiting.", e);
                std::process::exit(1);
            }
        }

        self.completed = true;

        self.execution_phase_watch
            .send(ExecutionPhase::BootstrapComplete)
            .ok();
    }

    #[cfg(unix)]
    fn load_fds(&mut self, upgrade: bool) -> Result<(), nix::Error> {
        let mut fds = Fds::new();
        if upgrade {
            debug!("Trying to receive socks");
            fds.get_from_sock(self.upgrade_sock.as_str())?
        }
        self.listen_fds = Some(Arc::new(TokioMutex::new(fds)));
        Ok(())
    }

    #[cfg(unix)]
    pub fn get_fds(&self) -> Option<ListenFds> {
        self.listen_fds.clone()
    }
}

#[async_trait]
impl BackgroundService for BootstrapService {
    async fn start_with_ready_notifier(
        &self,
        _shutdown: ShutdownWatch,
        notifier: ServiceReadyNotifier,
    ) {
        self.inner.lock().bootstrap();
        notifier.notify_ready();
    }
}

#[async_trait]
impl BackgroundService for SentryInitService {
    async fn start_with_ready_notifier(
        &self,
        _shutdown: ShutdownWatch,
        notifier: ServiceReadyNotifier,
    ) {
        self.inner.lock().start_sentry();
        notifier.notify_ready();
    }
}


================================================
FILE: pingora-core/src/server/configuration/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Server configurations
//!
//! Server configurations define startup settings such as:
//! * User and group to run as after daemonization
//! * Number of threads per service
//! * Error log file path

use clap::Parser;
use log::{debug, trace};
use pingora_error::{Error, ErrorType::*, OrErr, Result};
use serde::{Deserialize, Serialize};
use std::ffi::OsString;
use std::fs;

// default maximum upstream retries for retry-able proxy errors
const DEFAULT_MAX_RETRIES: usize = 16;

/// The configuration file
///
/// Pingora configuration files are by default YAML files, but any key value format can potentially
/// be used.
///
/// # Extension
/// New keys can be added to the configuration files which this configuration object will ignore.
/// Then, users can parse these key-values to pass to their code to use.
#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
#[serde(default)]
pub struct ServerConf {
    /// Version
    pub version: usize,
    /// Whether to run this process in the background.
    pub daemon: bool,
    /// When configured and `daemon` setting is `true`, error log will be written to the given
    /// file. Otherwise StdErr will be used.
    pub error_log: Option<String>,
    /// The pid (process ID) file of this server to be created when running in background
    pub pid_file: String,
    /// the path to the upgrade socket
    ///
    /// In order to perform zero downtime restart, both the new and old process need to agree on the
    /// path to this sock in order to coordinate the upgrade.
    pub upgrade_sock: String,
    /// If configured, after daemonization, this process will switch to the given user before
    /// starting to serve traffic.
    pub user: Option<String>,
    /// Similar to `user`, the group this process should switch to.
    pub group: Option<String>,
    /// How many threads **each** service should get. The threads are not shared across services.
    pub threads: usize,
    /// Number of listener tasks to use per fd. This allows for parallel accepts.
    pub listener_tasks_per_fd: usize,
    /// Allow work stealing between threads of the same service. Default `true`.
    pub work_stealing: bool,
    /// The path to CA file the SSL library should use. If empty, the default trust store location
    /// defined by the SSL library will be used.
    pub ca_file: Option<String>,
    /// The maximum number of unique s2n configs to cache. Creating a new s2n config is an
    /// expensive operation, so we cache and re-use config objects with identical configurations.
    /// A value of 0 disables the cache.
    ///
    /// WARNING: Disabling the s2n config cache can result in poor performance
    #[cfg(feature = "s2n")]
    pub s2n_config_cache_size: Option<usize>,
    /// Grace period in seconds before starting the final step of the graceful shutdown after signaling shutdown.
    pub grace_period_seconds: Option<u64>,
    /// Timeout in seconds of the final step for the graceful shutdown.
    pub graceful_shutdown_timeout_seconds: Option<u64>,
    // These options don't belong here as they are specific to certain services
    /// IPv4 addresses for a client connector to bind to. See
    /// [`ConnectorOptions`](crate::connectors::ConnectorOptions).
    /// Note: this is an _unstable_ field that may be renamed or removed in the future.
    pub client_bind_to_ipv4: Vec<String>,
    /// IPv6 addresses for a client connector to bind to. See
    /// [`ConnectorOptions`](crate::connectors::ConnectorOptions).
    /// Note: this is an _unstable_ field that may be renamed or removed in the future.
    pub client_bind_to_ipv6: Vec<String>,
    /// Keepalive pool size for client connections to upstream. See
    /// [`ConnectorOptions`](crate::connectors::ConnectorOptions).
    /// Note: this is an _unstable_ field that may be renamed or removed in the future.
    pub upstream_keepalive_pool_size: usize,
    /// Number of dedicated thread pools to use for upstream connection establishment.
    /// See [`ConnectorOptions`](crate::connectors::ConnectorOptions).
    /// Note: this is an _unstable_ field that may be renamed or removed in the future.
    pub upstream_connect_offload_threadpools: Option<usize>,
    /// Number of threads per dedicated upstream connection establishment pool.
    /// See [`ConnectorOptions`](crate::connectors::ConnectorOptions).
    /// Note: this is an _unstable_ field that may be renamed or removed in the future.
    pub upstream_connect_offload_thread_per_pool: Option<usize>,
    /// When enabled allows TLS keys to be written to a file specified by the SSLKEYLOG
    /// env variable. This can be used by tools like Wireshark to decrypt upstream traffic
    /// for debugging purposes.
    /// Note: this is an _unstable_ field that may be renamed or removed in the future.
    pub upstream_debug_ssl_keylog: bool,
    /// The maximum number of retries that will be attempted when an error is
    /// retry-able (`e.retry() == true`) when proxying to upstream.
    ///
    /// This setting is a fail-safe and defaults to 16.
    pub max_retries: usize,
    /// Maximum number of retries for upgrade socket connect and accept operations.
    /// This controls how many times send_fds_to will retry connecting and how many times
    /// get_fds_from will retry accepting during graceful upgrades.
    /// The retry interval is 1 second between attempts.
    /// If not set, defaults to 5 retries.
    pub upgrade_sock_connect_accept_max_retries: Option<usize>,
}

impl Default for ServerConf {
    fn default() -> Self {
        ServerConf {
            version: 0,
            client_bind_to_ipv4: vec![],
            client_bind_to_ipv6: vec![],
            ca_file: None,
            #[cfg(feature = "s2n")]
            s2n_config_cache_size: None,
            daemon: false,
            error_log: None,
            upstream_debug_ssl_keylog: false,
            pid_file: "/tmp/pingora.pid".to_string(),
            upgrade_sock: "/tmp/pingora_upgrade.sock".to_string(),
            user: None,
            group: None,
            threads: 1,
            listener_tasks_per_fd: 1,
            work_stealing: true,
            upstream_keepalive_pool_size: 128,
            upstream_connect_offload_threadpools: None,
            upstream_connect_offload_thread_per_pool: None,
            grace_period_seconds: None,
            graceful_shutdown_timeout_seconds: None,
            max_retries: DEFAULT_MAX_RETRIES,
            upgrade_sock_connect_accept_max_retries: None,
        }
    }
}

/// Command-line options
///
/// Call `Opt::parse_args()` to build this object from the process's command line arguments.
#[derive(Parser, Debug, Default)]
#[clap(name = "basic", long_about = None)]
pub struct Opt {
    /// Whether this server should try to upgrade from a running old server
    #[clap(
        short,
        long,
        help = "This is the base set of command line arguments for a pingora-based service",
        long_help = None
    )]
    pub upgrade: bool,

    /// Whether this server should run in the background
    #[clap(short, long)]
    pub daemon: bool,

    /// Not actually used. This flag is there so that the server is not upset seeing this flag
    /// passed from `cargo test` sometimes
    #[clap(long, hide = true)]
    pub nocapture: bool,

    /// Test the configuration and exit
    ///
    /// When this flag is set, calling `server.bootstrap()` will exit the process without errors
    ///
    /// This flag is useful for upgrading service where the user wants to make sure the new
    /// service can start before shutting down the old server process.
    #[clap(
        short,
        long,
        help = "This flag is useful for upgrading service where the user wants \
                to make sure the new service can start before shutting down \
                the old server process.",
        long_help = None
    )]
    pub test: bool,

    /// The path to the configuration file.
    ///
    /// See [`ServerConf`] for more details of the configuration file.
    #[clap(short, long, help = "The path to the configuration file.", long_help = None)]
    pub conf: Option<String>,
}

impl ServerConf {
    // Does not has to be async until we want runtime reload
    pub fn load_from_yaml<P>(path: P) -> Result<Self>
    where
        P: AsRef<std::path::Path> + std::fmt::Display,
    {
        let conf_str = fs::read_to_string(&path).or_err_with(ReadError, || {
            format!("Unable to read conf file from {path}")
        })?;
        debug!("Conf file read from {path}");
        Self::from_yaml(&conf_str)
    }

    pub fn load_yaml_with_opt_override(opt: &Opt) -> Result<Self> {
        if let Some(path) = &opt.conf {
            let mut conf = Self::load_from_yaml(path)?;
            conf.merge_with_opt(opt);
            Ok(conf)
        } else {
            Error::e_explain(ReadError, "No path specified")
        }
    }

    pub fn new() -> Option<Self> {
        Self::from_yaml("---\nversion: 1").ok()
    }

    pub fn new_with_opt_override(opt: &Opt) -> Option<Self> {
        let conf = Self::new();
        match conf {
            Some(mut c) => {
                c.merge_with_opt(opt);
                Some(c)
            }
            None => None,
        }
    }

    pub fn from_yaml(conf_str: &str) -> Result<Self> {
        trace!("Read conf file: {conf_str}");
        let conf: ServerConf = serde_yaml::from_str(conf_str).or_err_with(ReadError, || {
            format!("Unable to parse yaml conf {conf_str}")
        })?;

        trace!("Loaded conf: {conf:?}");
        conf.validate()
    }

    pub fn to_yaml(&self) -> String {
        serde_yaml::to_string(self).unwrap()
    }

    pub fn validate(self) -> Result<Self> {
        // TODO: do the validation
        Ok(self)
    }

    pub fn merge_with_opt(&mut self, opt: &Opt) {
        if opt.daemon {
            self.daemon = true;
        }
    }
}

/// Create an instance of Opt by parsing the current command-line args.
/// This is equivalent to running `Opt::parse` but does not require the
/// caller to have included the `clap::Parser`
impl Opt {
    pub fn parse_args() -> Self {
        Opt::parse()
    }

    pub fn parse_from_args<I, T>(args: I) -> Self
    where
        I: IntoIterator<Item = T>,
        T: Into<OsString> + Clone,
    {
        Opt::parse_from(args)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn init_log() {
        let _ = env_logger::builder().is_test(true).try_init();
    }

    #[test]
    fn not_a_test_i_cannot_write_yaml_by_hand() {
        init_log();
        let conf = ServerConf {
            version: 1,
            client_bind_to_ipv4: vec!["1.2.3.4".to_string(), "5.6.7.8".to_string()],
            client_bind_to_ipv6: vec![],
            ca_file: None,
            #[cfg(feature = "s2n")]
            s2n_config_cache_size: None,
            daemon: false,
            error_log: None,
            upstream_debug_ssl_keylog: false,
            pid_file: "".to_string(),
            upgrade_sock: "".to_string(),
            user: None,
            group: None,
            threads: 1,
            listener_tasks_per_fd: 1,
            work_stealing: true,
            upstream_keepalive_pool_size: 4,
            upstream_connect_offload_threadpools: None,
            upstream_connect_offload_thread_per_pool: None,
            grace_period_seconds: None,
            graceful_shutdown_timeout_seconds: None,
            max_retries: 1,
            upgrade_sock_connect_accept_max_retries: None,
        };
        // cargo test -- --nocapture not_a_test_i_cannot_write_yaml_by_hand
        println!("{}", conf.to_yaml());
    }

    #[test]
    fn test_load_file() {
        init_log();
        let conf_str = r#"
---
version: 1
client_bind_to_ipv4:
    - 1.2.3.4
    - 5.6.7.8
client_bind_to_ipv6: []
        "#
        .to_string();
        let conf = ServerConf::from_yaml(&conf_str).unwrap();
        assert_eq!(2, conf.client_bind_to_ipv4.len());
        assert_eq!(0, conf.client_bind_to_ipv6.len());
        assert_eq!(1, conf.version);
    }

    #[test]
    fn test_default() {
        init_log();
        let conf_str = r#"
---
version: 1
        "#
        .to_string();
        let conf = ServerConf::from_yaml(&conf_str).unwrap();
        assert_eq!(0, conf.client_bind_to_ipv4.len());
        assert_eq!(0, conf.client_bind_to_ipv6.len());
        assert_eq!(1, conf.version);
        assert_eq!(DEFAULT_MAX_RETRIES, conf.max_retries);
        assert_eq!("/tmp/pingora.pid", conf.pid_file);
    }
}


================================================
FILE: pingora-core/src/server/daemon.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use daemonize::{Daemonize, Stdio};
use log::{debug, error};
use std::ffi::CString;
use std::fs::{self, OpenOptions};
use std::os::unix::prelude::OpenOptionsExt;
use std::path::Path;

use crate::server::configuration::ServerConf;

// Utilities to daemonize a pingora server, i.e. run the process in the background, possibly
// under a different running user and/or group.

// XXX: this operation should have been done when the old service is exiting.
// Now the new pid file just kick the old one out of the way
fn move_old_pid(path: &str) {
    if !Path::new(path).exists() {
        debug!("Old pid file does not exist");
        return;
    }
    let new_path = format!("{path}.old");
    match fs::rename(path, &new_path) {
        Ok(()) => {
            debug!("Old pid file renamed");
        }
        Err(e) => {
            error!(
                "failed to rename pid file from {} to {}: {}",
                path, new_path, e
            );
        }
    }
}

unsafe fn gid_for_username(name: &CString) -> Option<libc::gid_t> {
    let passwd = libc::getpwnam(name.as_ptr() as *const libc::c_char);
    if !passwd.is_null() {
        return Some((*passwd).pw_gid);
    }
    None
}

/// Start a server instance as a daemon.
#[cfg(unix)]
pub fn daemonize(conf: &ServerConf) {
    // TODO: customize working dir

    let daemonize = Daemonize::new()
        .umask(0o007) // allow same group to access files but not everyone else
        .pid_file(&conf.pid_file);

    let daemonize = if let Some(error_log) = conf.error_log.as_ref() {
        let err = OpenOptions::new()
            .append(true)
            .create(true)
            // open read() in case there are no readers
            // available otherwise we will panic with
            // an ENXIO since O_NONBLOCK is set
            .read(true)
            .custom_flags(libc::O_NONBLOCK)
            .open(error_log)
            .unwrap();
        daemonize.stderr(err)
    } else {
        daemonize.stdout(Stdio::keep()).stderr(Stdio::keep())
    };

    let daemonize = match conf.user.as_ref() {
        Some(user) => {
            let user_cstr = CString::new(user.as_str()).unwrap();

            #[cfg(target_os = "macos")]
            let group_id = unsafe { gid_for_username(&user_cstr).map(|gid| gid as i32) };
            #[cfg(target_os = "freebsd")]
            let group_id = unsafe { gid_for_username(&user_cstr).map(|gid| gid as u32) };
            #[cfg(target_os = "linux")]
            let group_id = unsafe { gid_for_username(&user_cstr) };

            daemonize
                .privileged_action(move || {
                    if let Some(gid) = group_id {
                        // Set the supplemental group privileges for the child process.
                        unsafe {
                            libc::initgroups(user_cstr.as_ptr() as *const libc::c_char, gid);
                        }
                    }
                })
                .user(user.as_str())
                .chown_pid_file(true)
        }
        None => daemonize,
    };

    let daemonize = match conf.group.as_ref() {
        Some(group) => daemonize.group(group.as_str()),
        None => daemonize,
    };

    move_old_pid(&conf.pid_file);

    daemonize.start().unwrap(); // hard crash when fail
}


================================================
FILE: pingora-core/src/server/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Server process and configuration management

mod bootstrap_services;
pub mod configuration;
#[cfg(unix)]
mod daemon;
#[cfg(unix)]
pub(crate) mod transfer_fd;

use async_trait::async_trait;
#[cfg(unix)]
use daemon::daemonize;
use daggy::NodeIndex;
use log::{debug, error, info, warn};
use parking_lot::Mutex;
use pingora_runtime::Runtime;
use pingora_timeout::fast_timeout;
#[cfg(feature = "sentry")]
use sentry::ClientOptions;
use std::sync::Arc;
use std::thread;
use std::time::SystemTime;
#[cfg(unix)]
use tokio::signal::unix;
use tokio::sync::{broadcast, watch, Mutex as TokioMutex};
use tokio::time::{sleep, Duration};

use crate::prelude::background_service;
use crate::server::bootstrap_services::{Bootstrap, BootstrapService, SentryInitService};
use crate::services::{
    DependencyGraph, ServiceHandle, ServiceReadyNotifier, ServiceReadyWatch, ServiceWithDependents,
};
use configuration::{Opt, ServerConf};
use std::collections::HashMap;
#[cfg(unix)]
pub use transfer_fd::Fds;

use pingora_error::{Error, ErrorType, Result};

/* Time to wait before exiting the program.
This is the graceful period for all existing sessions to finish */
const EXIT_TIMEOUT: u64 = 60 * 5;
/* Time to wait before shutting down listening sockets.
This is the graceful period for the new service to get ready */
const CLOSE_TIMEOUT: u64 = 5;

enum ShutdownType {
    Graceful,
    Quick,
}

/// Internal wrapper for services with dependency metadata.
pub(crate) struct ServiceWrapper {
    ready_notifier: Option<ServiceReadyNotifier>,
    service: Box<dyn ServiceWithDependents>,
    service_handle: ServiceHandle,
}

/// The execution phase the server is currently in.
#[derive(Clone, Debug)]
#[non_exhaustive]
pub enum ExecutionPhase {
    /// The server was created, but has not started yet.
    Setup,

    /// Services are being prepared.
    ///
    /// During graceful upgrades this phase acquires the listening FDs from the old process.
    Bootstrap,

    /// Bootstrap has finished, listening FDs have been transferred.
    BootstrapComplete,

    /// The server is running and is listening for shutdown signals.
    Running,

    /// A QUIT signal was received, indicating that a new process wants to take over.
    ///
    /// The server is trying to send the fds to the new process over a Unix socket.
    GracefulUpgradeTransferringFds,

    /// FDs have been sent to the new process.
    /// Waiting a fixed amount of time to allow the new process to take the sockets.
    GracefulUpgradeCloseTimeout,

    /// A TERM signal was received, indicating that the server should shut down gracefully.
    GracefulTerminate,

    /// The server is shutting down.
    ShutdownStarted,

    /// Waiting for the configured grace period to end before shutting down.
    ShutdownGracePeriod,

    /// Wait for runtimes to finish.
    ShutdownRuntimes,

    /// The server has stopped.
    Terminated,
}

/// The receiver for server's shutdown event. The value will turn to true once the server starts
/// to shutdown
pub type ShutdownWatch = watch::Receiver<bool>;
#[cfg(unix)]
pub type ListenFds = Arc<TokioMutex<Fds>>;

/// The type of shutdown process that has been requested.
#[derive(Debug)]
pub enum ShutdownSignal {
    /// Send file descriptors to the new process before starting runtime shutdown with
    /// [ServerConf::graceful_shutdown_timeout_seconds] timeout.
    GracefulUpgrade,
    /// Wait for [ServerConf::grace_period_seconds] before starting runtime shutdown with
    /// [ServerConf::graceful_shutdown_timeout_seconds] timeout.
    GracefulTerminate,
    /// Shutdown with no timeout for runtime shutdown.
    FastShutdown,
}

/// Watcher of a shutdown signal, e.g., [UnixShutdownSignalWatch] for Unix-like
/// platforms.
#[async_trait]
pub trait ShutdownSignalWatch {
    /// Returns the desired shutdown type once one has been requested.
    async fn recv(&self) -> ShutdownSignal;
}

/// A Unix shutdown watcher that awaits for Unix signals.
///
/// - `SIGQUIT`: graceful upgrade
/// - `SIGTERM`: graceful terminate
/// - `SIGINT`: fast shutdown
#[cfg(unix)]
pub struct UnixShutdownSignalWatch;

#[cfg(unix)]
#[async_trait]
impl ShutdownSignalWatch for UnixShutdownSignalWatch {
    async fn recv(&self) -> ShutdownSignal {
        let mut graceful_upgrade_signal = unix::signal(unix::SignalKind::quit()).unwrap();
        let mut graceful_terminate_signal = unix::signal(unix::SignalKind::terminate()).unwrap();
        let mut fast_shutdown_signal = unix::signal(unix::SignalKind::interrupt()).unwrap();

        tokio::select! {
            _ = graceful_upgrade_signal.recv() => {
                ShutdownSignal::GracefulUpgrade
            },
            _ = graceful_terminate_signal.recv() => {
                ShutdownSignal::GracefulTerminate
            },
            _ = fast_shutdown_signal.recv() => {
                ShutdownSignal::FastShutdown
            },
        }
    }
}

/// Arguments to configure running of the pingora server.
pub struct RunArgs {
    /// Signal for initating shutdown
    #[cfg(unix)]
    pub shutdown_signal: Box<dyn ShutdownSignalWatch>,
}

impl Default for RunArgs {
    #[cfg(unix)]
    fn default() -> Self {
        Self {
            shutdown_signal: Box::new(UnixShutdownSignalWatch),
        }
    }

    #[cfg(windows)]
    fn default() -> Self {
        Self {}
    }
}

/// The server object
///
/// This object represents an entire pingora server process which may have multiple independent
/// services (see [crate::services]). The server object handles signals, reading configuration,
/// zero downtime upgrade and error reporting.
pub struct Server {
    // This is a way to add services that have to be run before any others
    // without requiring dependencies to be set directly
    init_services: Vec<Box<dyn ServiceWithDependents + 'static>>,

    services: HashMap<NodeIndex, ServiceWrapper>,
    shutdown_watch: watch::Sender<bool>,
    // TODO: we many want to drop this copy to let sender call closed()
    shutdown_recv: ShutdownWatch,

    /// Tracks the execution phase of the server during upgrades and graceful shutdowns.
    ///
    /// Users can subscribe to the phase with [`Self::watch_execution_phase()`].
    execution_phase_watch: broadcast::Sender<ExecutionPhase>,

    /// Specification of service level dependencies
    dependencies: Arc<Mutex<DependencyGraph>>,

    /// Service initialization
    bootstrap: Arc<Mutex<Bootstrap>>,

    /// The parsed server configuration
    pub configuration: Arc<ServerConf>,
    /// The parser command line options
    pub options: Option<Opt>,
}

// TODO: delete the pid when exit

impl Server {
    /// Acquire a receiver for the server's execution phase.
    ///
    /// The receiver will produce values for each transition.
    pub fn watch_execution_phase(&self) -> broadcast::Receiver<ExecutionPhase> {
        self.execution_phase_watch.subscribe()
    }

    #[cfg(unix)]
    async fn main_loop(&self, run_args: RunArgs) -> ShutdownType {
        // waiting for exit signal

        self.execution_phase_watch
            .send(ExecutionPhase::Running)
            .ok();

        match run_args.shutdown_signal.recv().await {
            ShutdownSignal::FastShutdown => {
                info!("SIGINT received, exiting");
                ShutdownType::Quick
            }
            ShutdownSignal::GracefulTerminate => {
                // we receive a graceful terminate, all instances are instructed to stop
                info!("SIGTERM received, gracefully exiting");
                // graceful shutdown if there are listening sockets
                info!("Broadcasting graceful shutdown");
                match self.shutdown_watch.send(true) {
                    Ok(_) => {
                        info!("Graceful shutdown started!");
                    }
                    Err(e) => {
                        error!("Graceful shutdown broadcast failed: {e}");
                    }
                }
                info!("Broadcast graceful shutdown complete");

                self.execution_phase_watch
                    .send(ExecutionPhase::GracefulTerminate)
                    .ok();

                ShutdownType::Graceful
            }
            ShutdownSignal::GracefulUpgrade => {
                // TODO: still need to select! on signals in case a fast shutdown is needed
                // aka: move below to another task and only kick it off here
                info!("SIGQUIT received, sending socks and gracefully exiting");

                self.execution_phase_watch
                    .send(ExecutionPhase::GracefulUpgradeTransferringFds)
                    .ok();

                if let Some(fds) = self.listen_fds() {
                    let fds = fds.lock().await;
                    info!("Trying to send socks");
                    // XXX: this is blocking IO
                    match fds.send_to_sock(self.configuration.as_ref().upgrade_sock.as_str()) {
                        Ok(_) => {
                            info!("listener sockets sent");
                        }
                        Err(e) => {
                            error!("Unable to send listener sockets to new process: {e}");
                            // sentry log error on fd send failure
                            #[cfg(all(not(debug_assertions), feature = "sentry"))]
                            sentry::capture_error(&e);
                        }
                    }
                    self.execution_phase_watch
                        .send(ExecutionPhase::GracefulUpgradeCloseTimeout)
                        .ok();
                    sleep(Duration::from_secs(CLOSE_TIMEOUT)).await;
                    info!("Broadcasting graceful shutdown");
                    // gracefully exiting
                    match self.shutdown_watch.send(true) {
                        Ok(_) => {
                            info!("Graceful shutdown started!");
                        }
                        Err(e) => {
                            error!("Graceful shutdown broadcast failed: {e}");
                            // switch to fast shutdown
                            return ShutdownType::Graceful;
                        }
                    }
                    info!("Broadcast graceful shutdown complete");
                    ShutdownType::Graceful
                } else {
                    info!("No socks to send, shutting down.");
                    ShutdownType::Graceful
                }
            }
        }
    }

    #[cfg(windows)]
    async fn main_loop(&self, _run_args: RunArgs) -> ShutdownType {
        // waiting for exit signal

        self.execution_phase_watch
            .send(ExecutionPhase::Running)
            .ok();

        match tokio::signal::ctrl_c().await {
            Ok(()) => {
                info!("Ctrl+C received, gracefully exiting");
                // graceful shutdown if there are listening sockets
                info!("Broadcasting graceful shutdown");
                match self.shutdown_watch.send(true) {
                    Ok(_) => {
                        info!("Graceful shutdown started!");
                    }
                    Err(e) => {
                        error!("Graceful shutdown broadcast failed: {e}");
                    }
                }
                info!("Broadcast graceful shutdown complete");

                self.execution_phase_watch
                    .send(ExecutionPhase::GracefulTerminate)
                    .ok();

                ShutdownType::Graceful
            }
            Err(e) => {
                error!("Unable to listen for shutdown signal: {}", e);
                ShutdownType::Quick
            }
        }
    }

    #[cfg(feature = "sentry")]
    #[cfg_attr(docsrs, doc(cfg(feature = "sentry")))]
    /// The Sentry ClientOptions.
    ///
    /// Panics and other events sentry captures will be sent to this DSN **only in release mode**
    pub fn set_sentry_config(&mut self, sentry_config: ClientOptions) {
        self.bootstrap.lock().set_sentry_config(Some(sentry_config));
    }

    /// Get the configured file descriptors for listening
    #[cfg(unix)]
    fn listen_fds(&self) -> Option<ListenFds> {
        self.bootstrap.lock().get_fds()
    }

    #[allow(clippy::too_many_arguments)]
    fn run_service(
        mut service: Box<dyn ServiceWithDependents>,
        #[cfg(unix)] fds: Option<ListenFds>,
        shutdown: ShutdownWatch,
        threads: usize,
        work_stealing: bool,
        listeners_per_fd: usize,
        ready_notifier: ServiceReadyNotifier,
        dependency_watches: Vec<ServiceReadyWatch>,
    ) -> Runtime
// NOTE: we need to keep the runtime outside async since
        // otherwise the runtime will be dropped.
    {
        let service_runtime = Server::create_runtime(service.name(), threads, work_stealing);
        let service_name = service.name().to_string();
        service_runtime.get_handle().spawn(async move {
            // Wait for all dependencies to be ready
            let mut time_waited_opt: Option<Duration> = None;
            for mut watch in dependency_watches {
                let start = SystemTime::now();

                if watch.wait_for(|&ready| ready).await.is_err() {
                    error!(
                        "Service '{}' dependency channel closed before ready",
                        service_name
                    );
                }

                *time_waited_opt.get_or_insert_default() += start.elapsed().unwrap_or_default()
            }

            if let Some(time_waited) = time_waited_opt {
                service.on_startup_delay(time_waited);
            }

            // Start the actual service, passing the ready notifier
            service
                .start_service(
                    #[cfg(unix)]
                    fds,
                    shutdown,
                    listeners_per_fd,
                    ready_notifier,
                )
                .await;
            info!("service '{}' exited.", service_name);
        });
        service_runtime
    }

    /// Create a new [`Server`], using the [`Opt`] and [`ServerConf`] values provided
    ///
    /// This method is intended for pingora frontends that are NOT using the built-in
    /// command line and configuration file parsing, and are instead using their own.
    ///
    /// If a configuration file path is provided as part of `opt`, it will be ignored
    /// and a warning will be logged.
    pub fn new_with_opt_and_conf(raw_opt: impl Into<Option<Opt>>, mut conf: ServerConf) -> Server {
        let opt = raw_opt.into();
        if let Some(opts) = &opt {
            if let Some(c) = opts.conf.as_ref() {
                warn!("Ignoring command line argument using '{c}' as configuration, and using provided configuration instead.");
            }
            conf.merge_with_opt(opts);
        }

        let (tx, rx) = watch::channel(false);

        let execution_phase_watch = broadcast::channel(100).0;
        let bootstrap = Arc::new(Mutex::new(Bootstrap::new(
            &opt,
            &conf,
            &execution_phase_watch,
        )));

        Server {
            services: Default::default(),
            init_services: Default::default(),
            shutdown_watch: tx,
            shutdown_recv: rx,
            execution_phase_watch,
            configuration: Arc::new(conf),
            options: opt,
            dependencies: Arc::new(Mutex::new(DependencyGraph::new())),
            bootstrap,
        }
    }

    /// Create a new [`Server`].
    ///
    /// Only one [`Server`] needs to be created for a process. A [`Server`] can hold multiple
    /// independent services.
    ///
    /// Command line options can either be passed by parsing the command line arguments via
    /// `Opt::parse_args()`, or be generated by other means.
    pub fn new(opt: impl Into<Option<Opt>>) -> Result<Server> {
        let opt = opt.into();
        let (tx, rx) = watch::channel(false);

        let execution_phase_watch = broadcast::channel(100).0;
        let conf = if let Some(opt) = opt.as_ref() {
            opt.conf.as_ref().map_or_else(
                || {
                    // options, no conf, generated
                    ServerConf::new_with_opt_override(opt).ok_or_else(|| {
                        Error::explain(ErrorType::ReadError, "Conf generation failed")
                    })
                },
                |_| {
                    // options and conf loaded
                    ServerConf::load_yaml_with_opt_override(opt)
                },
            )
        } else {
            ServerConf::new()
                .ok_or_else(|| Error::explain(ErrorType::ReadError, "Conf generation failed"))
        }?;

        let bootstrap = Arc::new(Mutex::new(Bootstrap::new(
            &opt,
            &conf,
            &execution_phase_watch,
        )));

        Ok(Server {
            services: Default::default(),
            init_services: Default::default(),
            shutdown_watch: tx,
            shutdown_recv: rx,
            execution_phase_watch,
            configuration: Arc::new(conf),
            options: opt,
            dependencies: Arc::new(Mutex::new(DependencyGraph::new())),
            bootstrap,
        })
    }

    /// Add a service that all other services will wait on before starting.
    fn add_init_service(&mut self, service: impl ServiceWithDependents + 'static) {
        let boxed_service = Box::new(service);
        self.init_services.push(boxed_service);
    }

    /// Add the init services as dependencies for all existing services
    fn apply_init_service_dependencies(&mut self) {
        let services = self
            .services
            .values()
            .map(|service| service.service_handle.clone())
            .collect::<Vec<_>>();
        let global_deps = self
            .init_services
            .drain(..)
            .collect::<Vec<_>>()
            .into_iter()
            .map(|dep| self.add_boxed_service(dep))
            .collect::<Vec<_>>();
        for service in services {
            service.add_dependencies(&global_deps);
        }
    }

    /// Add a service to this server.
    ///
    /// Returns a [`ServiceHandle`] that can be used to declare dependencies.
    ///
    /// # Example
    ///
    /// ```rust,ignore
    /// let db_id = server.add_service(database_service);
    /// let api_id = server.add_service(api_service);
    ///
    /// // Declare that API depends on database
    /// api_id.add_dependency(&db_id);
    /// ```
    pub fn add_service(&mut self, service: impl ServiceWithDependents + 'static) -> ServiceHandle {
        self.add_boxed_service(Box::new(service))
    }

    /// Add a pre-boxed service to this server.
    ///
    /// Returns a [`ServiceHandle`] that can be used to declare dependencies.
    ///
    /// # Example
    ///
    /// ```rust,ignore
    /// let db_id = server.add_service(database_service);
    /// let api_id = server.add_service(api_service);
    ///
    /// // Declare that API depends on database
    /// api_id.add_dependency(&db_id);
    /// ```
    pub fn add_boxed_service(
        &mut self,
        service_box: Box<dyn ServiceWithDependents>,
    ) -> ServiceHandle {
        let name = service_box.name().to_string();

        // Create a readiness notifier for this service
        let (tx, rx) = watch::channel(false);

        let id = self.dependencies.lock().add_node(name.clone(), rx.clone());

        let service_handle = ServiceHandle::new(id, name, rx, &self.dependencies);

        let wrapper = ServiceWrapper {
            ready_notifier: Some(ServiceReadyNotifier::new(tx)),
            service: service_box,
            service_handle: service_handle.clone(),
        };

        self.services.insert(id, wrapper);

        service_handle
    }

    /// Similar to [`Self::add_service()`], but take a list of services.
    ///
    /// Returns a `Vec<ServiceHandle>` for all added services.
    pub fn add_services(
        &mut self,
        services: Vec<Box<dyn ServiceWithDependents>>,
    ) -> Vec<ServiceHandle> {
        services
            .into_iter()
            .map(|service| self.add_boxed_service(service))
            .collect()
    }

    /// Prepare the server to start
    ///
    /// When trying to zero downtime upgrade from an older version of the server which is already
    /// running, this function will try to get all its listening sockets in order to take them over.
    pub fn bootstrap(&mut self) {
        self.bootstrap.lock().bootstrap();
    }

    /// Create a service that will run to prepare the service to start
    ///
    /// The created service will handle the zero-downtime upgrade from an older version of the server
    /// to this one. It will try to get all its listening sockets in order to take them over.
    ///
    /// Other bootstrapping functionality like sentry initialization will also be handled, but as a
    /// service that will complete before any other service starts.
    pub fn bootstrap_as_a_service(&mut self) -> ServiceHandle {
        let bootstrap_service =
            background_service("Bootstrap Service", BootstrapService::new(&self.bootstrap));

        let sentry_service = background_service(
            "Sentry Init Service",
            SentryInitService::new(&self.bootstrap),
        );

        self.add_init_service(sentry_service);

        self.add_service(bootstrap_service)
    }

    /// Start the server using [Self::run] and default [RunArgs].
    ///
    /// This function will block forever until the server needs to quit. So this would be the last
    /// function to call for this object.
    ///
    /// Note: this function may fork the process for daemonization, so any additional threads created
    /// before this function will be lost to any service logic once this function is called.
    pub fn run_forever(self) -> ! {
        self.run(RunArgs::default());

        std::process::exit(0)
    }

    /// Run the server until execution finished.
    ///
    /// This function will run until the server has been instructed to shut down
    /// through a signal, and will then wait for all services to finish and
    /// runtimes to exit.
    ///
    /// Note: if daemonization is enabled in the config, this function will
    /// never return.
    /// Instead it will either start the daemon process and exit, or panic
    /// if daemonization fails.
    pub fn run(mut self, run_args: RunArgs) {
        self.apply_init_service_dependencies();

        info!("Server starting");

        let conf = self.configuration.as_ref();

        #[cfg(unix)]
        if conf.daemon {
            info!("Daemonizing the server");
            fast_timeout::pause_for_fork();
            daemonize(&self.configuration);
            fast_timeout::unpause();
        }

        #[cfg(windows)]
        if conf.daemon {
            panic!("Daemonizing under windows is not supported");
        }

        // Holds tuples of runtimes and their service name.
        let mut runtimes: Vec<(Runtime, String)> = Vec::new();

        // Get services in topological order (dependencies first)
        let startup_order = match self.dependencies.lock().topological_sort() {
            Ok(order) => order,
            Err(e) => {
                error!("Failed to determine service startup order: {}", e);
                std::process::exit(1);
            }
        };

        // Log service names in startup order
        let service_names: Vec<String> = startup_order
            .iter()
            .map(|(_, service)| service.name.clone())
            .collect();
        info!("Starting services in dependency order: {:?}", service_names);

        // Start services in dependency order
        for (service_id, service) in startup_order {
            let mut wrapper = match self.services.remove(&service_id) {
                Some(w) => w,
                None => {
                    warn!(
                        "Service ID {:?}-{} in startup order but not found",
                        service_id, service.name
                    );
                    continue;
                }
            };

            let threads = wrapper.service.threads().unwrap_or(conf.threads);
            let name = wrapper.service.name().to_string();

            // Extract dependency watches from the ServiceHandle
            let dependencies = self
                .dependencies
                .lock()
                .get_dependencies(wrapper.service_handle.id);

            // Get the readiness notifier for this service by taking it from the Option.
            // Since service_id is the index, we can directly access it.
            // We take() the notifier, leaving None in its place.
            let ready_notifier = wrapper
                .ready_notifier
                .take()
                .expect("Service notifier should exist");

            if !dependencies.is_empty() {
                info!(
                    "Service '{name}' will wait for dependencies: {:?}",
                    dependencies.iter().map(|s| &s.name).collect::<Vec<_>>()
                );
            } else {
                info!("Starting service: {}", name);
            }

            let dependency_watches = dependencies
                .iter()
                .map(|s| s.ready_watch.clone())
                .collect::<Vec<_>>();

            let runtime = Server::run_service(
                wrapper.service,
                #[cfg(unix)]
                self.listen_fds(),
                self.shutdown_recv.clone(),
                threads,
                conf.work_stealing,
                self.configuration.listener_tasks_per_fd,
                ready_notifier,
                dependency_watches,
            );
            runtimes.push((runtime, name));
        }

        // blocked on main loop so that it runs forever
        // Only work steal runtime can use block_on()
        let server_runtime = Server::create_runtime("Server", 1, true);
        #[cfg(unix)]
        let shutdown_type = server_runtime
            .get_handle()
            .block_on(self.main_loop(run_args));
        #[cfg(windows)]
        let shutdown_type = server_runtime
            .get_handle()
            .block_on(self.main_loop(run_args));

        self.execution_phase_watch
            .send(ExecutionPhase::ShutdownStarted)
            .ok();

        if matches!(shutdown_type, ShutdownType::Graceful) {
            self.execution_phase_watch
                .send(ExecutionPhase::ShutdownGracePeriod)
                .ok();

            let exit_timeout = self
                .configuration
                .as_ref()
                .grace_period_seconds
                .unwrap_or(EXIT_TIMEOUT);
            info!("Graceful shutdown: grace period {}s starts", exit_timeout);
            thread::sleep(Duration::from_secs(exit_timeout));
            info!("Graceful shutdown: grace period ends");
        }

        // Give tokio runtimes time to exit
        let shutdown_timeout = match shutdown_type {
            ShutdownType::Quick => Duration::from_secs(0),
            ShutdownType::Graceful => Duration::from_secs(
                self.configuration
                    .as_ref()
                    .graceful_shutdown_timeout_seconds
                    .unwrap_or(5),
            ),
        };

        self.execution_phase_watch
            .send(ExecutionPhase::ShutdownRuntimes)
            .ok();

        let shutdowns: Vec<_> = runtimes
            .into_iter()
            .map(|(rt, name)| {
                info!("Waiting for runtimes to exit!");
                let join = thread::spawn(move || {
                    rt.shutdown_timeout(shutdown_timeout);
                    thread::sleep(shutdown_timeout)
                });
                (join, name)
            })
            .collect();
        for (shutdown, name) in shutdowns {
            info!("Waiting for service runtime {} to exit", name);
            if let Err(e) = shutdown.join() {
                error!("Failed to shutdown service runtime {}: {:?}", name, e);
            }
            debug!("Service runtime {} has exited", name);
        }
        info!("All runtimes exited, exiting now");

        self.execution_phase_watch
            .send(ExecutionPhase::Terminated)
            .ok();
    }

    fn create_runtime(name: &str, threads: usize, work_steal: bool) -> Runtime {
        if work_steal {
            Runtime::new_steal(threads, name)
        } else {
            Runtime::new_no_steal(threads, name)
        }
    }
}


================================================
FILE: pingora-core/src/server/transfer_fd/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#[cfg(target_os = "linux")]
use log::{debug, error, warn};
use nix::errno::Errno;
#[cfg(target_os = "linux")]
use nix::sys::socket::{self, AddressFamily, RecvMsg, SockFlag, SockType, UnixAddr};
#[cfg(target_os = "linux")]
use nix::sys::stat;
use nix::{Error, NixPath};
use std::collections::HashMap;
use std::io::Write;
#[cfg(target_os = "linux")]
use std::io::{IoSlice, IoSliceMut};
use std::os::unix::io::RawFd;
#[cfg(target_os = "linux")]
use std::{thread, time};

// Utilities to transfer file descriptors between sockets, e.g. during graceful upgrades.

/// Container for open file descriptors and their associated bind addresses.
pub struct Fds {
    map: HashMap<String, RawFd>,
}

impl Fds {
    pub fn new() -> Self {
        Fds {
            map: HashMap::new(),
        }
    }

    pub fn add(&mut self, bind: String, fd: RawFd) {
        self.map.insert(bind, fd);
    }

    pub fn get(&self, bind: &str) -> Option<&RawFd> {
        self.map.get(bind)
    }

    pub fn serialize(&self) -> (Vec<String>, Vec<RawFd>) {
        self.map.iter().map(|(key, val)| (key.clone(), val)).unzip()
    }

    pub fn deserialize(&mut self, binds: Vec<String>, fds: Vec<RawFd>) {
        assert_eq!(binds.len(), fds.len());
        for (bind, fd) in binds.into_iter().zip(fds) {
            self.map.insert(bind, fd);
        }
    }

    pub fn send_to_sock<P>(&self, path: &P) -> Result<usize, Error>
    where
        P: ?Sized + NixPath + std::fmt::Display,
    {
        let (vec_key, vec_fds) = self.serialize();
        let mut ser_buf: [u8; 2048] = [0; 2048];
        let ser_key_size = serialize_vec_string(&vec_key, &mut ser_buf);
        send_fds_to(vec_fds, &ser_buf[..ser_key_size], path, None)
    }

    pub fn get_from_sock<P>(&mut self, path: &P) -> Result<(), Error>
    where
        P: ?Sized + NixPath + std::fmt::Display,
    {
        let mut de_buf: [u8; 2048] = [0; 2048];
        let (fds, bytes) = get_fds_from(path, &mut de_buf, None)?;
        let keys = deserialize_vec_string(&de_buf[..bytes])?;
        self.deserialize(keys, fds);
        Ok(())
    }
}

fn serialize_vec_string(vec_string: &[String], mut buf: &mut [u8]) -> usize {
    // There are many ways to do this. Serde is probably the way to go
    // But let's start with something simple: space separated strings
    let joined = vec_string.join(" ");
    // TODO: check the buf is large enough
    buf.write(joined.as_bytes()).unwrap()
}

fn deserialize_vec_string(buf: &[u8]) -> Result<Vec<String>, Error> {
    let joined = std::str::from_utf8(buf).map_err(|_| Error::EINVAL)?;
    Ok(joined.split_ascii_whitespace().map(String::from).collect())
}

#[cfg(target_os = "linux")]
pub fn get_fds_from<P>(
    path: &P,
    payload: &mut [u8],
    max_retry: Option<usize>,
) -> Result<(Vec<RawFd>, usize), Error>
where
    P: ?Sized + NixPath + std::fmt::Display,
{
    let max_retry = max_retry.unwrap_or(MAX_RETRY);
    const MAX_FDS: usize = 32;

    let listen_fd = socket::socket(
        AddressFamily::Unix,
        SockType::Stream,
        SockFlag::SOCK_NONBLOCK,
        None,
    )
    .unwrap();
    let unix_addr = UnixAddr::new(path).unwrap();
    // clean up old sock
    match nix::unistd::unlink(path) {
        Ok(()) => {
            debug!("unlink {} done", path);
        }
        Err(e) => {
            // Normal if file does not exist
            debug!("unlink {} failed: {}", path, e);
            // TODO: warn if exist but not able to unlink
        }
    };
    socket::bind(listen_fd, &unix_addr).unwrap();

    /* sock is created before we change user, need to give permission */
    stat::fchmodat(
        None,
        path,
        stat::Mode::from_bits_truncate(0o666),
        stat::FchmodatFlags::FollowSymlink,
    )
    .unwrap();

    socket::listen(listen_fd, 8).unwrap();

    let fd = match accept_with_retry_timeout(listen_fd, max_retry) {
        Ok(fd) => fd,
        Err(e) => {
            error!("Giving up reading socket from: {path}, error: {e:?}");
            //cleanup
            if nix::unistd::close(listen_fd).is_ok() {
                nix::unistd::unlink(path).unwrap();
            }
            return Err(e);
        }
    };

    let mut io_vec = [IoSliceMut::new(payload); 1];
    let mut cmsg_buf = nix::cmsg_space!([RawFd; MAX_FDS]);
    let msg: RecvMsg<UnixAddr> = socket::recvmsg(
        fd,
        &mut io_vec,
        Some(&mut cmsg_buf),
        socket::MsgFlags::empty(),
    )
    .unwrap();

    let mut fds: Vec<RawFd> = Vec::new();
    for cmsg in msg.cmsgs() {
        if let socket::ControlMessageOwned::ScmRights(mut vec_fds) = cmsg {
            fds.append(&mut vec_fds)
        } else {
            warn!("Unexpected control messages: {cmsg:?}")
        }
    }

    //cleanup
    if nix::unistd::close(listen_fd).is_ok() {
        nix::unistd::unlink(path).unwrap();
    }

    Ok((fds, msg.bytes))
}

#[cfg(not(target_os = "linux"))]
pub fn get_fds_from<P>(
    _path: &P,
    _payload: &mut [u8],
    _max_retry: Option<usize>,
) -> Result<(Vec<RawFd>, usize), Error>
where
    P: ?Sized + NixPath + std::fmt::Display,
{
    log::error!("Upgrade is not currently supported outside of Linux platforms");
    Err(Errno::ECONNREFUSED)
}

#[cfg(target_os = "linux")]
const MAX_RETRY: usize = 5;
#[cfg(target_os = "linux")]
const RETRY_INTERVAL: time::Duration = time::Duration::from_secs(1);

#[cfg(target_os = "linux")]
fn accept_with_retry_timeout(listen_fd: i32, max_retry: usize) -> Result<i32, Error> {
    let mut retried = 0;
    loop {
        match socket::accept(listen_fd) {
            Ok(fd) => return Ok(fd),
            Err(e) => {
                if retried > max_retry {
                    return Err(e);
                }
                match e {
                    Errno::EAGAIN => {
                        error!(
                            "No incoming socket transfer, sleep {RETRY_INTERVAL:?} and try again"
                        );
                        retried += 1;
                        thread::sleep(RETRY_INTERVAL);
                    }
                    _ => {
                        error!("Error accepting socket transfer: {e}");
                        return Err(e);
                    }
                }
            }
        }
    }
}

#[cfg(target_os = "linux")]
pub fn send_fds_to<P>(
    fds: Vec<RawFd>,
    payload: &[u8],
    path: &P,
    max_retry: Option<usize>,
) -> Result<usize, Error>
where
    P: ?Sized + NixPath + std::fmt::Display,
{
    let max_retry = max_retry.unwrap_or(MAX_RETRY);
    const MAX_NONBLOCKING_POLLS: usize = 20;
    const NONBLOCKING_POLL_INTERVAL: time::Duration = time::Duration::from_millis(500);

    let send_fd = socket::socket(
        AddressFamily::Unix,
        SockType::Stream,
        SockFlag::SOCK_NONBLOCK,
        None,
    )?;
    let unix_addr = UnixAddr::new(path)?;
    let mut retried = 0;
    let mut nonblocking_polls = 0;

    let conn_result: Result<usize, Error> = loop {
        match socket::connect(send_fd, &unix_addr) {
            Ok(_) => break Ok(0),
            Err(e) => match e {
                /* If the new process hasn't created the upgrade sock we'll get an ENOENT.
                ECONNREFUSED may happen if the sock wasn't cleaned up
                and the old process tries sending before the new one is listening.
                EACCES may happen if connect() happen before the correct permission is set */
                Errno::ENOENT | Errno::ECONNREFUSED | Errno::EACCES => {
                    /*the server is not ready yet*/
                    retried += 1;
                    if retried > max_retry {
                        error!(
                            "Max retry: {} reached. Giving up sending socket to: {}, error: {:?}",
                            max_retry, path, e
                        );
                        break Err(e);
                    }
                    warn!("server not ready, will try again in {RETRY_INTERVAL:?}");
                    thread::sleep(RETRY_INTERVAL);
                }
                /* handle nonblocking IO */
                Errno::EINPROGRESS => {
                    nonblocking_polls += 1;
                    if nonblocking_polls >= MAX_NONBLOCKING_POLLS {
                        error!("Connect() not ready after retries when sending socket to: {path}",);
                        break Err(e);
                    }
                    warn!("Connect() not ready, will try again in {NONBLOCKING_POLL_INTERVAL:?}",);
                    thread::sleep(NONBLOCKING_POLL_INTERVAL);
                }
                _ => {
                    error!("Error sending socket to: {path}, error: {e:?}");
                    break Err(e);
                }
            },
        }
    };

    let result = match conn_result {
        Ok(_) => {
            let io_vec = [IoSlice::new(payload); 1];
            let scm = socket::ControlMessage::ScmRights(fds.as_slice());
            let cmsg = [scm; 1];
            loop {
                match socket::sendmsg(
                    send_fd,
                    &io_vec,
                    &cmsg,
                    socket::MsgFlags::empty(),
                    None::<&UnixAddr>,
                ) {
                    Ok(result) => break Ok(result),
                    Err(e) => match e {
                        /* handle nonblocking IO */
                        Errno::EAGAIN => {
                            nonblocking_polls += 1;
                            if nonblocking_polls >= MAX_NONBLOCKING_POLLS {
                                error!(
                                    "Sendmsg() not ready after retries when sending socket to: {}",
                                    path
                                );
                                break Err(e);
                            }
                            warn!(
                                "Sendmsg() not ready, will try again in {:?}",
                                NONBLOCKING_POLL_INTERVAL
                            );
                            thread::sleep(NONBLOCKING_POLL_INTERVAL);
                        }
                        _ => break Err(e),
                    },
                }
            }
        }
        Err(_) => conn_result,
    };

    nix::unistd::close(send_fd).unwrap();
    result
}

#[cfg(not(target_os = "linux"))]
pub fn send_fds_to<P>(
    _fds: Vec<RawFd>,
    _payload: &[u8],
    _path: &P,
    _max_retry: Option<usize>,
) -> Result<usize, Error>
where
    P: ?Sized + NixPath + std::fmt::Display,
{
    Ok(0)
}

#[cfg(test)]
#[cfg(target_os = "linux")]
mod tests {
    use super::*;
    use log::{debug, error};

    fn init_log() {
        let _ = env_logger::builder().is_test(true).try_init();
    }

    #[test]
    fn test_add_get() {
        init_log();
        let mut fds = Fds::new();
        let key = "1.1.1.1:80".to_string();
        fds.add(key.clone(), 128);
        assert_eq!(128, *fds.get(&key).unwrap());
    }

    #[test]
    fn test_table_serde() {
        init_log();
        let mut fds = Fds::new();
        let key1 = "1.1.1.1:80".to_string();
        fds.add(key1.clone(), 128);
        let key2 = "1.1.1.1:443".to_string();
        fds.add(key2.clone(), 129);

        let (k, v) = fds.serialize();
        let mut fds2 = Fds::new();
        fds2.deserialize(k, v);

        assert_eq!(128, *fds2.get(&key1).unwrap());
        assert_eq!(129, *fds2.get(&key2).unwrap());
    }

    #[test]
    fn test_vec_string_serde() {
        init_log();
        let vec_str: Vec<String> = vec!["aaaa".to_string(), "bbb".to_string()];
        let mut ser_buf: [u8; 1024] = [0; 1024];
        let size = serialize_vec_string(&vec_str, &mut ser_buf);
        let de_vec_string = deserialize_vec_string(&ser_buf[..size]).unwrap();
        assert_eq!(de_vec_string.len(), 2);
        assert_eq!(de_vec_string[0], "aaaa");
        assert_eq!(de_vec_string[1], "bbb");
    }

    #[test]
    fn test_send_receive_fds() {
        init_log();
        let dumb_fd = socket::socket(
            AddressFamily::Unix,
            SockType::Stream,
            SockFlag::empty(),
            None,
        )
        .unwrap();

        // receiver need to start in another thread since it is blocking
        let child = thread::spawn(move || {
            let mut buf: [u8; 32] = [0; 32];
            let (fds, bytes) =
                get_fds_from("/tmp/pingora_fds_receive.sock", &mut buf, None).unwrap();
            debug!("{:?}", fds);
            assert_eq!(1, fds.len());
            assert_eq!(32, bytes);
            assert_eq!(1, buf[0]);
            assert_eq!(1, buf[31]);
        });

        let fds = vec![dumb_fd];
        let buf: [u8; 128] = [1; 128];
        match send_fds_to(fds, &buf, "/tmp/pingora_fds_receive.sock", None) {
            Ok(sent) => {
                assert!(sent > 0);
            }
            Err(e) => {
                error!("{:?}", e);
                panic!()
            }
        }

        child.join().unwrap();
    }

    #[test]
    fn test_serde_via_socket() {
        init_log();
        let mut fds = Fds::new();
        let key1 = "1.1.1.1:80".to_string();
        let dumb_fd1 = socket::socket(
            AddressFamily::Unix,
            SockType::Stream,
            SockFlag::empty(),
            None,
        )
        .unwrap();
        fds.add(key1.clone(), dumb_fd1);
        let key2 = "1.1.1.1:443".to_string();
        let dumb_fd2 = socket::socket(
            AddressFamily::Unix,
            SockType::Stream,
            SockFlag::empty(),
            None,
        )
        .unwrap();
        fds.add(key2.clone(), dumb_fd2);

        let child = thread::spawn(move || {
            let mut fds2 = Fds::new();
            fds2.get_from_sock("/tmp/pingora_fds_receive2.sock")
                .unwrap();
            assert!(*fds2.get(&key1).unwrap() > 0);
            assert!(*fds2.get(&key2).unwrap() > 0);
        });

        fds.send_to_sock("/tmp/pingora_fds_receive2.sock").unwrap();
        child.join().unwrap();
    }

    #[test]
    fn test_send_fds_to_respects_configurable_timeout() {
        init_log();
        use std::time::Instant;

        let dumb_fd = socket::socket(
            AddressFamily::Unix,
            SockType::Stream,
            SockFlag::empty(),
            None,
        )
        .unwrap();

        let fds = vec![dumb_fd];
        let buf: [u8; 32] = [1; 32];

        // Try to send with a custom max_retries of 2
        let start = Instant::now();
        let result = send_fds_to(fds, &buf, "/tmp/pingora_test_config_send.sock", Some(2));
        let elapsed = start.elapsed();

        // Should fail after 2 retries with RETRY_INTERVAL (1 second) between each
        // Total time should be approximately 2 seconds
        assert!(result.is_err());
        assert!(
            elapsed.as_secs() >= 2,
            "Expected at least 2 seconds, got {:?}",
            elapsed
        );
        assert!(
            elapsed.as_secs() < 4,
            "Expected less than 4 seconds, got {:?}",
            elapsed
        );
    }

    #[test]
    fn test_get_fds_from_respects_configurable_timeout() {
        init_log();
        use std::time::Instant;

        let mut buf: [u8; 32] = [0; 32];

        // Try to receive with a custom max_retries of 2
        let start = Instant::now();
        let result = get_fds_from("/tmp/pingora_test_config_receive.sock", &mut buf, Some(2));
        let elapsed = start.elapsed();

        // Should fail after 2 retries with RETRY_INTERVAL (1 second) between each
        // Total time should be approximately 2 seconds
        assert!(result.is_err());
        assert!(
            elapsed.as_secs() >= 2,
            "Expected at least 2 seconds, got {:?}",
            elapsed
        );
        assert!(
            elapsed.as_secs() < 4,
            "Expected less than 4 seconds, got {:?}",
            elapsed
        );
    }
}


================================================
FILE: pingora-core/src/services/background.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! The background service
//!
//! A [BackgroundService] can be run as part of a Pingora application to add supporting logic that
//! exists outside of the request/response lifecycle.
//! Examples might include service discovery (load balancing) and background updates such as
//! push-style metrics.

use async_trait::async_trait;
use std::sync::Arc;

use super::{ServiceReadyNotifier, ServiceWithDependents};
#[cfg(unix)]
use crate::server::ListenFds;
use crate::server::ShutdownWatch;

/// The background service interface
///
/// You can implement a background service with or without the ready notifier,
/// but you shouldn't implement both. Under the hood, the pingora service will
/// call the `start_with_ready_notifier` function. By default this function will
/// call the regular `start` function.
#[async_trait]
pub trait BackgroundService {
    /// This function is called when the pingora server tries to start all the
    /// services. The background service should signal readiness by calling
    /// `ready_notifier.notify_ready()` once initialization is complete.
    /// The service can return at anytime or wait for the `shutdown` signal.
    ///
    /// By default this method will immediately signal readiness and call
    /// through to the regular `start` function
    async fn start_with_ready_notifier(
        &self,
        shutdown: ShutdownWatch,
        ready_notifier: ServiceReadyNotifier,
    ) {
        ready_notifier.notify_ready();
        self.start(shutdown).await;
    }

    /// This function is called when the pingora server tries to start all the
    /// services. The background service can return at anytime or wait for the
    /// `shutdown` signal.
    async fn start(&self, mut _shutdown: ShutdownWatch) {}
}

/// A generic type of background service
pub struct GenBackgroundService<A> {
    // Name of the service
    name: String,
    // Task the service will execute
    task: Arc<A>,
    /// The number of threads. Default is 1
    pub threads: Option<usize>,
}

impl<A> GenBackgroundService<A> {
    /// Generates a background service that can run in the pingora runtime
    pub fn new(name: String, task: Arc<A>) -> Self {
        Self {
            name,
            task,
            threads: Some(1),
        }
    }

    /// Return the task behind [Arc] to be shared other logic.
    pub fn task(&self) -> Arc<A> {
        self.task.clone()
    }
}

#[async_trait]
impl<A> ServiceWithDependents for GenBackgroundService<A>
where
    A: BackgroundService + Send + Sync + 'static,
{
    // Use default start_service implementation which signals ready immediately
    // and then calls start_service

    async fn start_service(
        &mut self,
        #[cfg(unix)] _fds: Option<ListenFds>,
        shutdown: ShutdownWatch,
        _listeners_per_fd: usize,
        ready: ServiceReadyNotifier,
    ) {
        self.task.start_with_ready_notifier(shutdown, ready).await;
    }

    fn name(&self) -> &str {
        &self.name
    }

    fn threads(&self) -> Option<usize> {
        self.threads
    }
}

/// Helper function to create a background service with a human readable name
pub fn background_service<SV>(name: &str, task: SV) -> GenBackgroundService<SV> {
    GenBackgroundService::new(format!("BG {name}"), Arc::new(task))
}


================================================
FILE: pingora-core/src/services/listening.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! The listening service
//!
//! A [Service] (listening service) responds to incoming requests on its endpoints.
//! Each [Service] can be configured with custom application logic (e.g. an `HTTPProxy`) and one or
//! more endpoints to listen to.

use crate::apps::ServerApp;
use crate::listeners::tls::TlsSettings;
#[cfg(feature = "connection_filter")]
use crate::listeners::AcceptAllFilter;
use crate::listeners::{
    ConnectionFilter, Listeners, ServerAddress, TcpSocketOptions, TransportStack,
};
use crate::protocols::Stream;
#[cfg(unix)]
use crate::server::ListenFds;
use crate::server::ShutdownWatch;
use crate::services::Service as ServiceTrait;

use async_trait::async_trait;
use log::{debug, error, info};
use pingora_error::Result;
use pingora_runtime::current_handle;
use pingora_timeout::timeout;
use std::fs::Permissions;
use std::sync::Arc;
use std::time::Duration;

/// The type of service that is associated with a list of listening endpoints and a particular application
pub struct Service<A> {
    name: String,
    listeners: Listeners,
    app_logic: Option<A>,
    /// The number of preferred threads. `None` to follow global setting.
    pub threads: Option<usize>,
    #[cfg(feature = "connection_filter")]
    connection_filter: Arc<dyn ConnectionFilter>,
}

impl<A> Service<A> {
    /// Create a new [`Service`] with the given application (see [`crate::apps`]).
    pub fn new(name: String, app_logic: A) -> Self {
        Service {
            name,
            listeners: Listeners::new(),
            app_logic: Some(app_logic),
            threads: None,
            #[cfg(feature = "connection_filter")]
            connection_filter: Arc::new(AcceptAllFilter),
        }
    }

    /// Create a new [`Service`] with the given application (see [`crate::apps`]) and the given
    /// [`Listeners`].
    pub fn with_listeners(name: String, listeners: Listeners, app_logic: A) -> Self {
        Service {
            name,
            listeners,
            app_logic: Some(app_logic),
            threads: None,
            #[cfg(feature = "connection_filter")]
            connection_filter: Arc::new(AcceptAllFilter),
        }
    }

    /// Set a custom connection filter for this service.
    ///
    /// The connection filter will be applied to all incoming connections
    /// on all endpoints of this service. Connections that don't pass the
    /// filter will be dropped immediately at the TCP level, before TLS
    /// handshake or any HTTP processing.
    ///
    /// # Feature Flag
    ///
    /// This method requires the `connection_filter` feature to be enabled.
    /// When the feature is disabled, this method is a no-op.
    ///
    /// # Example
    ///
    /// ```rust,no_run
    /// # use std::sync::Arc;
    /// # use pingora_core::listeners::{ConnectionFilter, AcceptAllFilter};
    /// # struct MyService;
    /// # impl MyService {
    /// #   fn new() -> Self { MyService }
    /// # }
    /// let mut service = MyService::new();
    /// let filter = Arc::new(AcceptAllFilter);
    /// service.set_connection_filter(filter);
    /// ```
    #[cfg(feature = "connection_filter")]
    pub fn set_connection_filter(&mut self, filter: Arc<dyn ConnectionFilter>) {
        self.connection_filter = filter.clone();
        self.listeners.set_connection_filter(filter);
    }

    #[cfg(not(feature = "connection_filter"))]
    pub fn set_connection_filter(&mut self, _filter: Arc<dyn ConnectionFilter>) {}

    /// Get the [`Listeners`], mostly to add more endpoints.
    pub fn endpoints(&mut self) -> &mut Listeners {
        &mut self.listeners
    }

    // the follow add* function has no effect if the server is already started

    /// Add a TCP listening endpoint with the given address (e.g., `127.0.0.1:8000`).
    pub fn add_tcp(&mut self, addr: &str) {
        self.listeners.add_tcp(addr);
    }

    /// Add a TCP listening endpoint with the given [`TcpSocketOptions`].
    pub fn add_tcp_with_settings(&mut self, addr: &str, sock_opt: TcpSocketOptions) {
        self.listeners.add_tcp_with_settings(addr, sock_opt);
    }

    /// Add a Unix domain socket listening endpoint with the given path.
    ///
    /// Optionally take a permission of the socket file. The default is read and write access for
    /// everyone (0o666).
    #[cfg(unix)]
    pub fn add_uds(&mut self, addr: &str, perm: Option<Permissions>) {
        self.listeners.add_uds(addr, perm);
    }

    /// Add a TLS listening endpoint with the given certificate and key paths.
    pub fn add_tls(&mut self, addr: &str, cert_path: &str, key_path: &str) -> Result<()> {
        self.listeners.add_tls(addr, cert_path, key_path)
    }

    /// Add a TLS listening endpoint with the given [`TlsSettings`] and [`TcpSocketOptions`].
    pub fn add_tls_with_settings(
        &mut self,
        addr: &str,
        sock_opt: Option<TcpSocketOptions>,
        settings: TlsSettings,
    ) {
        self.listeners
            .add_tls_with_settings(addr, sock_opt, settings)
    }

    /// Add an endpoint according to the given [`ServerAddress`]
    pub fn add_address(&mut self, addr: ServerAddress) {
        self.listeners.add_address(addr);
    }

    /// Get a reference to the application inside this service
    pub fn app_logic(&self) -> Option<&A> {
        self.app_logic.as_ref()
    }

    /// Get a mutable reference to the application inside this service
    pub fn app_logic_mut(&mut self) -> Option<&mut A> {
        self.app_logic.as_mut()
    }
}

impl<A: ServerApp + Send + Sync + 'static> Service<A> {
    pub async fn handle_event(event: Stream, app_logic: Arc<A>, shutdown: ShutdownWatch) {
        debug!("new event!");
        let mut reuse_event = app_logic.process_new(event, &shutdown).await;
        while let Some(event) = reuse_event {
            // TODO: with no steal runtime, consider spawn() the next event on
            // another thread for more evenly load balancing
            debug!("new reusable event!");
            reuse_event = app_logic.process_new(event, &shutdown).await;
        }
    }

    async fn run_endpoint(
        app_logic: Arc<A>,
        mut stack: TransportStack,
        mut shutdown: ShutdownWatch,
    ) {
        // the accept loop, until the system is shutting down
        loop {
            let new_io = tokio::select! { // TODO: consider biased for perf reason?
                new_io = stack.accept() => new_io,
                shutdown_signal = shutdown.changed() => {
                    match shutdown_signal {
                        Ok(()) => {
                            if !*shutdown.borrow() {
                                // happen in the initial read
                                continue;
                            }
                            info!("Shutting down {}", stack.as_str());
                            break;
                        }
                        Err(e) => {
                            error!("shutdown_signal error {e}");
                            break;
                        }
                    }
                }
            };
            match new_io {
                Ok(io) => {
                    let app = app_logic.clone();
                    let shutdown = shutdown.clone();
                    current_handle().spawn(async move {
                        let peer_addr = io.peer_addr();
                        match timeout(Duration::from_secs(60), io.handshake()).await {
                            Ok(handshake) => {
                                match handshake {
                                    Ok(io) => Self::handle_event(io, app, shutdown).await,
                                    Err(e) => {
                                        // TODO: Maybe IOApp trait needs a fn to handle/filter out this error
                                        if let Some(addr) = peer_addr {
                                            error!("Downstream handshake error from {}: {e}", addr);
                                        } else {
                                            error!("Downstream handshake error: {e}");
                                        }
                                    }
                                }
                            }
                            Err(_) => {
                                error!("Downstream handshake timeout");
                            }
                        }
                    });
                }
                Err(e) => {
                    error!("Accept() failed {e}");
                    if let Some(io_error) = e
                        .root_cause()
                        .downcast_ref::<std::io::Error>()
                        .and_then(|e| e.raw_os_error())
                    {
                        // 24: too many open files. In this case accept() will continue return this
                        // error without blocking, which could use up all the resources
                        if io_error == 24 {
                            // call sleep to calm the thread down and wait for others to release
                            // some resources
                            tokio::time::sleep(std::time::Duration::from_secs(1)).await;
                        }
                    }
                }
            }
        }

        stack.cleanup();
    }
}

#[async_trait]
impl<A: ServerApp + Send + Sync + 'static> ServiceTrait for Service<A> {
    async fn start_service(
        &mut self,
        #[cfg(unix)] fds: Option<ListenFds>,
        shutdown: ShutdownWatch,
        listeners_per_fd: usize,
    ) {
        let runtime = current_handle();
        let endpoints = self
            .listeners
            .build(
                #[cfg(unix)]
                fds,
            )
            .await
            .expect("Failed to build listeners");

        let app_logic = self
            .app_logic
            .take()
            .expect("can only start_service() once");
        let app_logic = Arc::new(app_logic);

        let mut handlers = Vec::new();

        endpoints.into_iter().for_each(|endpoint| {
            for _ in 0..listeners_per_fd {
                let shutdown = shutdown.clone();
                let my_app_logic = app_logic.clone();
                let endpoint = endpoint.clone();

                let jh = runtime.spawn(async move {
                    Self::run_endpoint(my_app_logic, endpoint, shutdown).await;
                });

                handlers.push(jh);
            }
        });

        futures::future::join_all(handlers).await;
        self.listeners.cleanup();
        app_logic.cleanup().await;
    }

    fn name(&self) -> &str {
        &self.name
    }

    fn threads(&self) -> Option<usize> {
        self.threads
    }
}

use crate::apps::prometheus_http_app::PrometheusServer;

impl Service<PrometheusServer> {
    /// The Prometheus HTTP server
    ///
    /// The HTTP server endpoint that reports Prometheus metrics collected in the entire service
    pub fn prometheus_http_service() -> Self {
        Service::new(
            "Prometheus metric HTTP".to_string(),
            PrometheusServer::new(),
        )
    }
}


================================================
FILE: pingora-core/src/services/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! The service interface
//!
//! A service to the pingora server is just something runs forever until the server is shutting
//! down.
//!
//! Two types of services are particularly useful
//! - services that are listening to some (TCP) endpoints
//! - services that are just running in the background.

use async_trait::async_trait;
use daggy::Walker;
use daggy::{petgraph::visit::Topo, Dag, NodeIndex};
use log::{error, info, warn};
use parking_lot::Mutex;
use std::borrow::Borrow;
use std::sync::Arc;
use std::sync::Weak;
use std::time::Duration;
use tokio::sync::watch;

#[cfg(unix)]
use crate::server::ListenFds;
use crate::server::ShutdownWatch;

pub mod background;
pub mod listening;

/// A notification channel for signaling when a service has become ready.
///
/// Services can use this to notify other services that may depend on them
/// that they have successfully started and are ready to serve requests.
///
/// # Example
///
/// ```rust,ignore
/// use pingora_core::services::ServiceReadyNotifier;
///
/// async fn my_service(ready_notifier: ServiceReadyNotifier) {
///     // Perform initialization...
///
///     // Signal that the service is ready
///     ready_notifier.notify_ready();
///
///     // Continue with main service loop...
/// }
/// ```
pub struct ServiceReadyNotifier {
    sender: watch::Sender<bool>,
}

impl Drop for ServiceReadyNotifier {
    /// In the event that the notifier is dropped before notifying that the
    /// service is ready, we opt to signal ready anyway
    fn drop(&mut self) {
        // Ignore errors - if there are no receivers, that's fine
        let _ = self.sender.send(true);
    }
}

impl ServiceReadyNotifier {
    /// Creates a new ServiceReadyNotifier from a watch sender.
    /// You will not need to create one of these for normal usage, but being
    /// able to is useful for testing.
    pub fn new(sender: watch::Sender<bool>) -> Self {
        Self { sender }
    }

    /// Notifies dependent services that this service is ready.
    ///
    /// Consumes the notifier to ensure ready is only signaled once.
    pub fn notify_ready(self) {
        // Dropping the notifier will signal that the service is ready
        drop(self);
    }
}

/// A receiver for watching when a service becomes ready.
pub type ServiceReadyWatch = watch::Receiver<bool>;

/// A handle to a service in the server.
///
/// This is returned by [`crate::server::Server::add_service()`] and provides
/// methods to declare that other services depend on this one.
///
/// # Example
///
/// ```rust,ignore
/// let db_handle = server.add_service(database_service);
/// let cache_handle = server.add_service(cache_service);
///
/// let api_handle = server.add_service(api_service);
/// api_handle.add_dependency(&db_handle);
/// api_handle.add_dependency(&cache_handle);
/// ```
#[derive(Debug, Clone)]
pub struct ServiceHandle {
    pub(crate) id: NodeIndex,
    name: String,
    ready_watch: ServiceReadyWatch,
    dependencies: Weak<Mutex<DependencyGraph>>,
}

/// Internal representation of a dependency relationship.
#[derive(Debug, Clone)]
pub(crate) struct ServiceDependency {
    pub name: String,
    pub ready_watch: ServiceReadyWatch,
}

impl ServiceHandle {
    /// Creates a new ServiceHandle with the given ID, name, and readiness watcher.
    pub(crate) fn new(
        id: NodeIndex,
        name: String,
        ready_watch: ServiceReadyWatch,
        dependencies: &Arc<Mutex<DependencyGraph>>,
    ) -> Self {
        Self {
            id,
            name,
            ready_watch,
            dependencies: Arc::downgrade(dependencies),
        }
    }

    #[cfg(test)]
    fn get_dependencies(&self) -> Vec<ServiceDependency> {
        let Some(deps_lock) = self.dependencies.upgrade() else {
            return Vec::new();
        };

        let deps = deps_lock.lock();
        deps.get_dependencies(self.id)
    }

    /// Returns the name of the service.
    pub fn name(&self) -> &str {
        &self.name
    }

    /// Returns a clone of the readiness watcher for this service.
    #[allow(dead_code)]
    pub(crate) fn ready_watch(&self) -> ServiceReadyWatch {
        self.ready_watch.clone()
    }

    /// Declares that this service depends on another service.
    ///
    /// This service will not start until the specified dependency has started
    /// and signaled readiness.
    ///
    /// # Example
    ///
    /// ```rust,ignore
    /// let db_id = server.add_service(database_service);
    /// let api_id = server.add_service(api_service);
    ///
    /// // API service depends on database
    /// api_id.add_dependency(&db_id);
    /// ```
    pub fn add_dependency(&self, dependency: impl Borrow<ServiceHandle>) {
        let Some(deps_lock) = self.dependencies.upgrade() else {
            warn!("Attempted to add a dependency after the dependency tree was dropped");
            return;
        };

        let mut deps = deps_lock.lock();
        if let Err(e) = deps.add_dependency(self.id, dependency.borrow().id) {
            error!("Error creating dependency edge: {e}");
        }
    }

    /// Declares that this service depends on the given other services.
    ///
    /// This service will not start until the specified dependencies have
    /// started and signaled readiness.
    ///
    /// # Example
    ///
    /// ```rust,ignore
    /// let db_id = server.add_service(database_service);
    /// let cache_id = server.add_service(cache_service);
    /// let api_id = server.add_service(api_service);
    ///
    /// // API service depends on database
    /// api_id.add_dependencies(&[&db_id, &cache_id]);
    /// ```
    pub fn add_dependencies<'a, D>(&self, dependencies: impl IntoIterator<Item = D>)
    where
        D: Borrow<ServiceHandle> + 'a,
    {
        for dependency in dependencies {
            self.add_dependency(dependency);
        }
    }
}

/// Helper for validating service dependency graphs using daggy.
pub(crate) struct DependencyGraph {
    /// The directed acyclic graph structure from daggy.
    dag: Dag<ServiceDependency, ()>,
}

impl DependencyGraph {
    /// Creates a new dependency graph.
    pub(crate) fn new() -> Self {
        Self { dag: Dag::new() }
    }

    /// Adds a service node to the graph.
    ///
    /// This should be called for all services first, before adding edges.
    pub(crate) fn add_node(&mut self, name: String, ready_watch: ServiceReadyWatch) -> NodeIndex {
        self.dag.add_node(ServiceDependency { name, ready_watch })
    }
    /// Adds a dependency edge from one service to another.
    ///
    /// Returns an error if adding this dependency would create a cycle or reference
    /// a non-existent service.
    pub(crate) fn add_dependency(
        &mut self,
        dependent_service_node_idx: NodeIndex,
        dependency_service_node_idx: NodeIndex,
    ) -> Result<(), String> {
        // Try to add edge (from dependency to dependent)
        // daggy will return an error if this would create a cycle
        if let Err(cycle) =
            self.dag
                .add_edge(dependency_service_node_idx, dependent_service_node_idx, ())
        {
            return Err(format!(
                "Circular service dependency detected between {} and {} creating cycle: {cycle}",
                self.dag[dependency_service_node_idx].name,
                self.dag[dependent_service_node_idx].name
            ));
        }

        Ok(())
    }

    /// Returns services in topological order (dependencies before dependents).
    ///
    /// This ordering ensures that services are started in the correct order.
    /// Returns service IDs in the correct startup order.
    pub(crate) fn topological_sort(&self) -> Result<Vec<(NodeIndex, ServiceDependency)>, String> {
        // Use daggy's built-in topological walker
        let mut sorted = Vec::new();
        let mut topo = Topo::new(&self.dag);

        while let Some(service_id) = topo.next(&self.dag) {
            sorted.push((service_id, self.dag[service_id].clone()));
        }

        Ok(sorted)
    }

    pub(crate) fn get_dependencies(&self, service_id: NodeIndex) -> Vec<ServiceDependency> {
        self.dag
            .parents(service_id)
            .iter(&self.dag)
            .map(|(_, n)| self.dag[n].clone())
            .collect()
    }
}

impl Default for DependencyGraph {
    fn default() -> Self {
        Self::new()
    }
}

#[async_trait]
pub trait ServiceWithDependents: Send + Sync {
    /// This function will be called when the server is ready to start the service.
    ///
    /// Override this method if you need to control exactly when the service signals readiness
    /// (e.g., after async initialization is complete).
    ///
    /// # Arguments
    ///
    /// - `fds` (Unix only): a collection of listening file descriptors. During zero downtime restart
    ///   the `fds` would contain the listening sockets passed from the old service, services should
    ///   take the sockets they need to use then. If the sockets the service looks for don't appear in
    ///   the collection, the service should create its own listening sockets and then put them into
    ///   the collection in order for them to be passed to the next server.
    /// - `shutdown`: the shutdown signal this server would receive.
    /// - `listeners_per_fd`: number of listener tasks to spawn per file descriptor.
    /// - `ready_notifier`: notifier to signal when the service is ready. Services with
    ///   dependents should call `ready_notifier.notify_ready()` once they are fully initialized.
    async fn start_service(
        &mut self,
        #[cfg(unix)] fds: Option<ListenFds>,
        shutdown: ShutdownWatch,
        listeners_per_fd: usize,
        ready_notifier: ServiceReadyNotifier,
    );

    /// The name of the service, just for logging and naming the threads assigned to this service
    ///
    /// Note that due to the limit of the underlying system, only the first 16 chars will be used
    fn name(&self) -> &str;

    /// The preferred number of threads to run this service
    ///
    /// If `None`, the global setting will be used
    fn threads(&self) -> Option<usize> {
        None
    }

    /// This is currently called to inform the service about the delay it
    /// experienced from between waiting on its dependencies. Default behavior
    /// is to log the time.
    ///
    /// TODO. It would be nice if this function was called intermittently by
    /// the server while the service was waiting to give live updates while the
    /// service was waiting and allow the service to decide whether to keep
    /// waiting, continue anyway, or exit
    fn on_startup_delay(&self, time_waited: Duration) {
        info!(
            "Service {} spent {}ms waiting on dependencies",
            self.name(),
            time_waited.as_millis()
        );
    }
}

#[async_trait]
impl<S> ServiceWithDependents for S
where
    S: Service,
{
    async fn start_service(
        &mut self,
        #[cfg(unix)] fds: Option<ListenFds>,
        shutdown: ShutdownWatch,
        listeners_per_fd: usize,
        ready_notifier: ServiceReadyNotifier,
    ) {
        // Signal ready immediately
        ready_notifier.notify_ready();

        S::start_service(
            self,
            #[cfg(unix)]
            fds,
            shutdown,
            listeners_per_fd,
        )
        .await
    }

    fn name(&self) -> &str {
        S::name(self)
    }

    fn threads(&self) -> Option<usize> {
        S::threads(self)
    }

    fn on_startup_delay(&self, time_waited: Duration) {
        S::on_startup_delay(self, time_waited)
    }
}

/// The service interface
#[async_trait]
pub trait Service: Sync + Send {
    /// Start the service without readiness notification.
    ///
    /// This is a simpler version of [`Self::start_service()`] for services that don't need
    /// to control when they signal readiness. The default implementation does nothing.
    ///
    /// Most services should override this method instead of [`Self::start_service()`].
    ///
    /// # Arguments
    ///
    /// - `fds` (Unix only): a collection of listening file descriptors.
    /// - `shutdown`: the shutdown signal this server would receive.
    /// - `listeners_per_fd`: number of listener tasks to spawn per file descriptor.
    async fn start_service(
        &mut self,
        #[cfg(unix)] _fds: Option<ListenFds>,
        _shutdown: ShutdownWatch,
        _listeners_per_fd: usize,
    ) {
        // Default: do nothing
    }

    /// The name of the service, just for logging and naming the threads assigned to this service
    ///
    /// Note that due to the limit of the underlying system, only the first 16 chars will be used
    fn name(&self) -> &str;

    /// The preferred number of threads to run this service
    ///
    /// If `None`, the global setting will be used
    fn threads(&self) -> Option<usize> {
        None
    }

    /// This is currently called to inform the service about the delay it
    /// experienced from between waiting on its dependencies. Default behavior
    /// is to log the time.
    ///
    /// TODO. It would be nice if this function was called intermittently by
    /// the server while the service was waiting to give live updates while the
    /// service was waiting and allow the service to decide whether to keep
    /// waiting, continue anyway, or exit
    fn on_startup_delay(&self, time_waited: Duration) {
        info!(
            "Service {} spent {}ms waiting on dependencies",
            self.name(),
            time_waited.as_millis()
        );
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_service_handle_creation() {
        let deps: Arc<Mutex<DependencyGraph>> = Arc::new(Mutex::new(DependencyGraph::new()));
        let (tx, rx) = watch::channel(false);
        let service_id = ServiceHandle::new(0.into(), "test_service".to_string(), rx, &deps);

        assert_eq!(service_id.id, 0.into());
        assert_eq!(service_id.name(), "test_service");

        // Should be able to clone the watch
        let watch_clone = service_id.ready_watch();
        assert!(!*watch_clone.borrow());

        // Signaling ready should be observable through cloned watch
        tx.send(true).ok();
        assert!(*watch_clone.borrow());
    }

    #[test]
    fn test_service_handle_add_dependency() {
        let graph: Arc<Mutex<DependencyGraph>> = Arc::new(Mutex::new(DependencyGraph::new()));
        let (tx1, rx1) = watch::channel(false);
        let (tx1_clone, rx1_clone) = (tx1.clone(), rx1.clone());
        let (_tx2, rx2) = watch::channel(false);
        let (_tx2_clone, rx2_clone) = (_tx2.clone(), rx2.clone());

        // Add nodes to the graph first
        let dep_node = {
            let mut g = graph.lock();
            g.add_node("dependency".to_string(), rx1)
        };
        let main_node = {
            let mut g = graph.lock();
            g.add_node("main".to_string(), rx2)
        };

        let dep_service = ServiceHandle::new(dep_node, "dependency".to_string(), rx1_clone, &graph);
        let main_service = ServiceHandle::new(main_node, "main".to_string(), rx2_clone, &graph);

        // Add dependency
        main_service.add_dependency(&dep_service);

        // Get dependencies and verify
        let deps = main_service.get_dependencies();
        assert_eq!(deps.len(), 1);
        assert_eq!(deps[0].name, "dependency");

        // Verify watch is working
        assert!(!*deps[0].ready_watch.borrow());
        tx1_clone.send(true).ok();
        assert!(*deps[0].ready_watch.borrow());
    }

    #[test]
    fn test_service_handle_multiple_dependencies() {
        let graph: Arc<Mutex<DependencyGraph>> = Arc::new(Mutex::new(DependencyGraph::new()));
        let (_tx1, rx1) = watch::channel(false);
        let rx1_clone = rx1.clone();
        let (_tx2, rx2) = watch::channel(false);
        let rx2_clone = rx2.clone();
        let (_tx3, rx3) = watch::channel(false);
        let rx3_clone = rx3.clone();

        // Add nodes to the graph first
        let dep1_node = {
            let mut g = graph.lock();
            g.add_node("dep1".to_string(), rx1)
        };
        let dep2_node = {
            let mut g = graph.lock();
            g.add_node("dep2".to_string(), rx2)
        };
        let main_node = {
            let mut g = graph.lock();
            g.add_node("main".to_string(), rx3)
        };

        let dep1 = ServiceHandle::new(dep1_node, "dep1".to_string(), rx1_clone, &graph);
        let dep2 = ServiceHandle::new(dep2_node, "dep2".to_string(), rx2_clone, &graph);
        let main_service = ServiceHandle::new(main_node, "main".to_string(), rx3_clone, &graph);

        // Add multiple dependencies
        main_service.add_dependency(&dep1);
        main_service.add_dependency(&dep2);

        // Get dependencies and verify
        let deps = main_service.get_dependencies();
        assert_eq!(deps.len(), 2);

        let dep_names: Vec<&str> = deps.iter().map(|d| d.name.as_str()).collect();
        assert!(dep_names.contains(&"dep1"));
        assert!(dep_names.contains(&"dep2"));
    }

    #[test]
    fn test_single_service_no_dependencies() {
        let mut graph = DependencyGraph::new();
        let (_tx, rx) = watch::channel(false);
        let _node = graph.add_node("service1".to_string(), rx);

        let order = graph.topological_sort().unwrap();
        assert_eq!(order.len(), 1);
        assert_eq!(order[0].1.name, "service1");
    }

    #[test]
    fn test_simple_dependency_chain() {
        let mut graph = DependencyGraph::new();
        let (_tx1, rx1) = watch::channel(false);
        let (_tx2, rx2) = watch::channel(false);
        let (_tx3, rx3) = watch::channel(false);

        let node1 = graph.add_node("service1".to_string(), rx1);
        let node2 = graph.add_node("service2".to_string(), rx2);
        let node3 = graph.add_node("service3".to_string(), rx3);

        // service2 depends on service1, service3 depends on service2
        graph.add_dependency(node2, node1).unwrap();
        graph.add_dependency(node3, node2).unwrap();

        let order = graph.topological_sort().unwrap();
        assert_eq!(order.len(), 3);
        // Verify order: service1, service2, service3
        assert_eq!(order[0].1.name, "service1");
        assert_eq!(order[1].1.name, "service2");
        assert_eq!(order[2].1.name, "service3");
    }

    #[test]
    fn test_diamond_dependency() {
        let mut graph = DependencyGraph::new();
        let (_tx1, rx1) = watch::channel(false);
        let (_tx2, rx2) = watch::channel(false);
        let (_tx3, rx3) = watch::channel(false);

        let db = graph.add_node("db".to_string(), rx1);
        let cache = graph.add_node("cache".to_string(), rx2);
        let api = graph.add_node("api".to_string(), rx3);

        // api depends on both db and cache
        graph.add_dependency(api, db).unwrap();
        graph.add_dependency(api, cache).unwrap();

        let order = graph.topological_sort().unwrap();
        // api should come last, but db and cache order doesn't matter
        assert_eq!(order.len(), 3);
        assert_eq!(order[2].1.name, "api");
        let first_two: Vec<&str> = order[0..2].iter().map(|(_, d)| d.name.as_str()).collect();
        assert!(first_two.contains(&"db"));
        assert!(first_two.contains(&"cache"));
    }

    #[test]
    #[should_panic(expected = "node indices out of bounds")]
    fn test_missing_dependency() {
        let mut graph = DependencyGraph::new();
        let (_tx1, rx1) = watch::channel(false);

        let node1 = graph.add_node("service1".to_string(), rx1);
        let nonexistent = NodeIndex::new(999);

        // Try to add dependency on non-existent node - this should panic
        let _ = graph.add_dependency(node1, nonexistent);
    }

    #[test]
    fn test_circular_dependency_self() {
        let mut graph = DependencyGraph::new();
        let (_tx1, rx1) = watch::channel(false);

        let node1 = graph.add_node("service1".to_string(), rx1);

        // Try to make service depend on itself
        let result = graph.add_dependency(node1, node1);

        assert!(result.is_err());
        assert!(result.unwrap_err().contains("Circular"));
    }

    #[test]
    fn test_circular_dependency_two_services() {
        let mut graph = DependencyGraph::new();
        let (_tx1, rx1) = watch::channel(false);
        let (_tx2, rx2) = watch::channel(false);

        // Add both nodes first
        let node1 = graph.add_node("service1".to_string(), rx1);
        let node2 = graph.add_node("service2".to_string(), rx2);

        // Try to add circular dependencies
        graph.add_dependency(node1, node2).unwrap();
        let result = graph.add_dependency(node2, node1);

        assert!(result.is_err());
        assert!(result.unwrap_err().contains("Circular"));
    }

    #[test]
    fn test_circular_dependency_three_services() {
        let mut graph = DependencyGraph::new();
        let (_tx1, rx1) = watch::channel(false);
        let (_tx2, rx2) = watch::channel(false);
        let (_tx3, rx3) = watch::channel(false);

        // Add all nodes first
        let node1 = graph.add_node("service1".to_string(), rx1);
        let node2 = graph.add_node("service2".to_string(), rx2);
        let node3 = graph.add_node("service3".to_string(), rx3);

        // Add dependencies that would form a cycle
        graph.add_dependency(node1, node2).unwrap();
        graph.add_dependency(node2, node3).unwrap();
        let result = graph.add_dependency(node3, node1);

        assert!(result.is_err());
        assert!(result.unwrap_err().contains("Circular"));
    }

    #[test]
    fn test_complex_valid_graph() {
        let mut graph = DependencyGraph::new();
        let (_tx1, rx1) = watch::channel(false);
        let (_tx2, rx2) = watch::channel(false);
        let (_tx3, rx3) = watch::channel(false);
        let (_tx4, rx4) = watch::channel(false);
        let (_tx5, rx5) = watch::channel(false);

        // Build a complex dependency graph:
        //   db, cache - no deps
        //   auth -> db
        //   api -> db, cache, auth
        //   frontend -> api
        let db = graph.add_node("db".to_string(), rx1);
        let cache = graph.add_node("cache".to_string(), rx2);
        let auth = graph.add_node("auth".to_string(), rx3);
        let api = graph.add_node("api".to_string(), rx4);
        let frontend = graph.add_node("frontend".to_string(), rx5);

        graph.add_dependency(auth, db).unwrap();
        graph.add_dependency(api, db).unwrap();
        graph.add_dependency(api, cache).unwrap();
        graph.add_dependency(api, auth).unwrap();
        graph.add_dependency(frontend, api).unwrap();

        let order = graph.topological_sort().unwrap();

        // Verify ordering constraints using names
        let db_pos = order.iter().position(|(_, d)| d.name == "db").unwrap();
        let cache_pos = order.iter().position(|(_, d)| d.name == "cache").unwrap();
        let auth_pos = order.iter().position(|(_, d)| d.name == "auth").unwrap();
        let api_pos = order.iter().position(|(_, d)| d.name == "api").unwrap();
        let frontend_pos = order
            .iter()
            .position(|(_, d)| d.name == "frontend")
            .unwrap();

        assert!(db_pos < auth_pos);
        assert!(auth_pos < api_pos);
        assert!(db_pos < api_pos);
        assert!(cache_pos < api_pos);
        assert!(api_pos < frontend_pos);
    }
}


================================================
FILE: pingora-core/src/tls/mod.rs
================================================
// Copyright 2024 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! This module contains a dummy TLS implementation for the scenarios where real TLS
//! implementations are unavailable.

macro_rules! impl_display {
    ($ty:ty) => {
        impl std::fmt::Display for $ty {
            fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
                Ok(())
            }
        }
    };
}

macro_rules! impl_deref {
    ($from:ty => $to:ty) => {
        impl std::ops::Deref for $from {
            type Target = $to;
            fn deref(&self) -> &$to {
                panic!("Not implemented");
            }
        }
        impl std::ops::DerefMut for $from {
            fn deref_mut(&mut self) -> &mut $to {
                panic!("Not implemented");
            }
        }
    };
}

pub mod ssl {
    use super::error::ErrorStack;
    use super::x509::verify::X509VerifyParamRef;
    use super::x509::{X509VerifyResult, X509};

    /// An error returned from an ALPN selection callback.
    pub struct AlpnError;
    impl AlpnError {
        /// Terminate the handshake with a fatal alert.
        pub const ALERT_FATAL: AlpnError = Self {};

        /// Do not select a protocol, but continue the handshake.
        pub const NOACK: AlpnError = Self {};
    }

    /// A type which allows for configuration of a client-side TLS session before connection.
    pub struct ConnectConfiguration;
    impl_deref! {ConnectConfiguration => SslRef}
    impl ConnectConfiguration {
        /// Configures the use of Server Name Indication (SNI) when connecting.
        pub fn set_use_server_name_indication(&mut self, _use_sni: bool) {
            panic!("Not implemented");
        }

        /// Configures the use of hostname verification when connecting.
        pub fn set_verify_hostname(&mut self, _verify_hostname: bool) {
            panic!("Not implemented");
        }

        /// Returns an `Ssl` configured to connect to the provided domain.
        pub fn into_ssl(self, _domain: &str) -> Result<Ssl, ErrorStack> {
            panic!("Not implemented");
        }

        /// Like `SslContextBuilder::set_verify`.
        pub fn set_verify(&mut self, _mode: SslVerifyMode) {
            panic!("Not implemented");
        }

        /// Like `SslContextBuilder::set_alpn_protos`.
        pub fn set_alpn_protos(&mut self, _protocols: &[u8]) -> Result<(), ErrorStack> {
            panic!("Not implemented");
        }

        /// Returns a mutable reference to the X509 verification configuration.
        pub fn param_mut(&mut self) -> &mut X509VerifyParamRef {
            panic!("Not implemented");
        }
    }

    /// An SSL error.
    #[derive(Debug)]
    pub struct Error;
    impl_display!(Error);
    impl Error {
        pub fn code(&self) -> ErrorCode {
            panic!("Not implemented");
        }
    }

    /// An error code returned from SSL functions.
    #[derive(PartialEq)]
    pub struct ErrorCode(i32);
    impl ErrorCode {
        /// An error occurred in the SSL library.
        pub const SSL: ErrorCode = Self(0);
    }

    /// An identifier of a session name type.
    pub struct NameType;
    impl NameType {
        pub const HOST_NAME: NameType = Self {};
    }

    /// The state of an SSL/TLS session.
    pub struct Ssl;
    impl Ssl {
        /// Creates a new `Ssl`.
        pub fn new(_ctx: &SslContextRef) -> Result<Ssl, ErrorStack> {
            panic!("Not implemented");
        }
    }
    impl_deref! {Ssl => SslRef}

    /// A type which wraps server-side streams in a TLS session.
    pub struct SslAcceptor;
    impl SslAcceptor {
        /// Creates a new builder configured to connect to non-legacy clients. This should
        /// generally be considered a reasonable default choice.
        pub fn mozilla_intermediate_v5(
            _method: SslMethod,
        ) -> Result<SslAcceptorBuilder, ErrorStack> {
            panic!("Not implemented");
        }
    }

    /// A builder for `SslAcceptor`s.
    pub struct SslAcceptorBuilder;
    impl SslAcceptorBuilder {
        /// Consumes the builder, returning a `SslAcceptor`.
        pub fn build(self) -> SslAcceptor {
            panic!("Not implemented");
        }

        /// Sets the callback used by a server to select a protocol for Application Layer Protocol
        /// Negotiation (ALPN).
        pub fn set_alpn_select_callback<F>(&mut self, _callback: F)
        where
            F: for<'a> Fn(&mut SslRef, &'a [u8]) -> Result<&'a [u8], AlpnError>
                + 'static
                + Sync
                + Send,
        {
            panic!("Not implemented");
        }

        /// Loads a certificate chain from a file.
        pub fn set_certificate_chain_file<P: AsRef<std::path::Path>>(
            &mut self,
            _file: P,
        ) -> Result<(), ErrorStack> {
            panic!("Not implemented");
        }

        /// Loads the private key from a file.
        pub fn set_private_key_file<P: AsRef<std::path::Path>>(
            &mut self,
            _file: P,
            _file_type: SslFiletype,
        ) -> Result<(), ErrorStack> {
            panic!("Not implemented");
        }

        /// Sets the maximum supported protocol version.
        pub fn set_max_proto_version(
            &mut self,
            _version: Option<SslVersion>,
        ) -> Result<(), ErrorStack> {
            panic!("Not implemented");
        }
    }

    /// Reference to an [`SslCipher`].
    pub struct SslCipherRef;
    impl SslCipherRef {
        /// Returns the name of the cipher.
        pub fn name(&self) -> &'static str {
            panic!("Not implemented");
        }
    }

    /// A type which wraps client-side streams in a TLS session.
    pub struct SslConnector;
    impl SslConnector {
        /// Creates a new builder for TLS connections.
        pub fn builder(_method: SslMethod) -> Result<SslConnectorBuilder, ErrorStack> {
            panic!("Not implemented");
        }

        /// Returns a structure allowing for configuration of a single TLS session before connection.
        pub fn configure(&self) -> Result<ConnectConfiguration, ErrorStack> {
            panic!("Not implemented");
        }

        /// Returns a shared reference to the inner raw `SslContext`.
        pub fn context(&self) -> &SslContextRef {
            panic!("Not implemented");
        }
    }

    /// A builder for `SslConnector`s.
    pub struct SslConnectorBuilder;
    impl SslConnectorBuilder {
        /// Consumes the builder, returning an `SslConnector`.
        pub fn build(self) -> SslConnector {
            panic!("Not implemented");
        }

        /// Sets the list of supported ciphers for protocols before TLSv1.3.
        pub fn set_cipher_list(&mut self, _cipher_list: &str) -> Result<(), ErrorStack> {
            panic!("Not implemented");
        }

        /// Sets the context’s supported signature algorithms.
        pub fn set_sigalgs_list(&mut self, _sigalgs: &str) -> Result<(), ErrorStack> {
            panic!("Not implemented");
        }

        /// Sets the minimum supported protocol version.
        pub fn set_min_proto_version(
            &mut self,
            _version: Option<SslVersion>,
        ) -> Result<(), ErrorStack> {
            panic!("Not implemented");
        }

        /// Sets the maximum supported protocol version.
        pub fn set_max_proto_version(
            &mut self,
            _version: Option<SslVersion>,
        ) -> Result<(), ErrorStack> {
            panic!("Not implemented");
        }

        /// Use the default locations of trusted certificates for verification.
        pub fn set_default_verify_paths(&mut self) -> Result<(), ErrorStack> {
            panic!("Not implemented");
        }

        /// Loads trusted root certificates from a file.
        pub fn set_ca_file<P: AsRef<std::path::Path>>(
            &mut self,
            _file: P,
        ) -> Result<(), ErrorStack> {
            panic!("Not implemented");
        }

        /// Loads a leaf certificate from a file.
        pub fn set_certificate_file<P: AsRef<std::path::Path>>(
            &mut self,
            _file: P,
            _file_type: SslFiletype,
        ) -> Result<(), ErrorStack> {
            panic!("Not implemented");
        }

        /// Loads the private key from a file.
        pub fn set_private_key_file<P: AsRef<std::path::Path>>(
            &mut self,
            _file: P,
            _file_type: SslFiletype,
        ) -> Result<(), ErrorStack> {
            panic!("Not implemented");
        }

        /// Sets the TLS key logging callback.
        pub fn set_keylog_callback<F>(&mut self, _callback: F)
        where
            F: Fn(&SslRef, &str) + 'static + Sync + Send,
        {
            panic!("Not implemented");
        }
    }

    /// A context object for TLS streams.
    pub struct SslContext;
    impl SslContext {
        /// Creates a new builder object for an `SslContext`.
        pub fn builder(_method: SslMethod) -> Result<SslContextBuilder, ErrorStack> {
            panic!("Not implemented");
        }
    }
    impl_deref! {SslContext => SslContextRef}

    /// A builder for `SslContext`s.
    pub struct SslContextBuilder;
    impl SslContextBuilder {
        /// Consumes the builder, returning a new `SslContext`.
        pub fn build(self) -> SslContext {
            panic!("Not implemented");
        }
    }

    /// Reference to [`SslContext`]
    pub struct SslContextRef;

    /// An identifier of the format of a certificate or key file.
    pub struct SslFiletype;
    impl SslFiletype {
        /// The PEM format.
        pub const PEM: SslFiletype = Self {};
    }

    /// A type specifying the kind of protocol an `SslContext`` will speak.
    pub struct SslMethod;
    impl SslMethod {
        /// Support all versions of the TLS protocol.
        pub fn tls() -> SslMethod {
            panic!("Not implemented");
        }
    }

    /// Reference to an [`Ssl`].
    pub struct SslRef;
    impl SslRef {
        /// Like [`SslContextBuilder::set_verify`].
        pub fn set_verify(&mut self, _mode: SslVerifyMode) {
            panic!("Not implemented");
        }

        /// Returns the current cipher if the session is active.
        pub fn current_cipher(&self) -> Option<&SslCipherRef> {
            panic!("Not implemented");
        }

        /// Sets the host name to be sent to the server for Server Name Indication (SNI).
        pub fn set_hostname(&mut self, _hostname: &str) -> Result<(), ErrorStack> {
            panic!("Not implemented");
        }

        /// Returns the peer’s certificate, if present.
        pub fn peer_certificate(&self) -> Option<X509> {
            panic!("Not implemented");
        }

        /// Returns the certificate verification result.
        pub fn verify_result(&self) -> X509VerifyResult {
            panic!("Not implemented");
        }

        /// Returns a string describing the protocol version of the session.
        pub fn version_str(&self) -> &'static str {
            panic!("Not implemented");
        }

        /// Returns the protocol selected via Application Layer Protocol Negotiation (ALPN).
        pub fn selected_alpn_protocol(&self) -> Option<&[u8]> {
            panic!("Not implemented");
        }

        /// Returns the servername sent by the client via Server Name Indication (SNI).
        pub fn servername(&self, _type_: NameType) -> Option<&str> {
            panic!("Not implemented");
        }
    }

    /// Options controlling the behavior of certificate verification.
    pub struct SslVerifyMode;
    impl SslVerifyMode {
        /// Verifies that the peer’s certificate is trusted.
        pub const PEER: Self = Self {};

        /// Disables verification of the peer’s certificate.
        pub const NONE: Self = Self {};
    }

    /// An SSL/TLS protocol version.
    pub struct SslVersion;
    impl SslVersion {
        /// TLSv1.0
        pub const TLS1: SslVersion = Self {};

        /// TLSv1.2
        pub const TLS1_2: SslVersion = Self {};

        /// TLSv1.3
        pub const TLS1_3: SslVersion = Self {};
    }

    /// A standard implementation of protocol selection for Application Layer Protocol Negotiation
    /// (ALPN).
    pub fn select_next_proto<'a>(_server: &[u8], _client: &'a [u8]) -> Option<&'a [u8]> {
        panic!("Not implemented");
    }
}

pub mod ssl_sys {
    pub const X509_V_OK: i32 = 0;
    pub const X509_V_ERR_INVALID_CALL: i32 = 69;
}

pub mod error {
    use super::ssl::Error;

    /// Collection of [`Errors`] from OpenSSL.
    #[derive(Debug)]
    pub struct ErrorStack;
    impl_display!(ErrorStack);
    impl std::error::Error for ErrorStack {}
    impl ErrorStack {
        /// Returns the contents of the OpenSSL error stack.
        pub fn get() -> ErrorStack {
            panic!("Not implemented");
        }

        /// Returns the errors in the stack.
        pub fn errors(&self) -> &[Error] {
            panic!("Not implemented");
        }
    }
}

pub mod x509 {
    use super::asn1::{Asn1IntegerRef, Asn1StringRef, Asn1TimeRef};
    use super::error::ErrorStack;
    use super::hash::{DigestBytes, MessageDigest};
    use super::nid::Nid;

    /// An `X509` public key certificate.
    #[derive(Debug, Clone)]
    pub struct X509;
    impl_deref! {X509 => X509Ref}
    impl X509 {
        /// Deserializes a PEM-encoded X509 structure.
        pub fn from_pem(_pem: &[u8]) -> Result<X509, ErrorStack> {
            panic!("Not implemented");
        }
    }

    /// A type to destructure and examine an `X509Name`.
    pub struct X509NameEntries<'a> {
        marker: std::marker::PhantomData<&'a ()>,
    }
    impl<'a> Iterator for X509NameEntries<'a> {
        type Item = &'a X509NameEntryRef;
        fn next(&mut self) -> Option<&'a X509NameEntryRef> {
            panic!("Not implemented");
        }
    }

    /// Reference to `X509NameEntry`.
    pub struct X509NameEntryRef;
    impl X509NameEntryRef {
        pub fn data(&self) -> &Asn1StringRef {
            panic!("Not implemented");
        }
    }

    /// Reference to `X509Name`.
    pub struct X509NameRef;
    impl X509NameRef {
        /// Returns the name entries by the nid.
        pub fn entries_by_nid(&self, _nid: Nid) -> X509NameEntries<'_> {
            panic!("Not implemented");
        }
    }

    /// Reference to `X509`.
    pub struct X509Ref;
    impl X509Ref {
        /// Returns this certificate’s subject name.
        pub fn subject_name(&self) -> &X509NameRef {
            panic!("Not implemented");
        }

        /// Returns a digest of the DER representation of the certificate.
        pub fn digest(&self, _hash_type: MessageDigest) -> Result<DigestBytes, ErrorStack> {
            panic!("Not implemented");
        }

        /// Returns the certificate’s Not After validity period.
        pub fn not_after(&self) -> &Asn1TimeRef {
            panic!("Not implemented");
        }

        /// Returns this certificate’s serial number.
        pub fn serial_number(&self) -> &Asn1IntegerRef {
            panic!("Not implemented");
        }
    }

    /// The result of peer certificate verification.
    pub struct X509VerifyResult;
    impl X509VerifyResult {
        /// Return the integer representation of an `X509VerifyResult`.
        pub fn as_raw(&self) -> i32 {
            panic!("Not implemented");
        }
    }

    pub mod store {
        use super::super::error::ErrorStack;
        use super::X509;

        /// A builder type used to construct an `X509Store`.
        pub struct X509StoreBuilder;
        impl X509StoreBuilder {
            /// Returns a builder for a certificate store..
            pub fn new() -> Result<X509StoreBuilder, ErrorStack> {
                panic!("Not implemented");
            }

            /// Constructs the `X509Store`.
            pub fn build(self) -> X509Store {
                panic!("Not implemented");
            }

            /// Adds a certificate to the certificate store.
            pub fn add_cert(&mut self, _cert: X509) -> Result<(), ErrorStack> {
                panic!("Not implemented");
            }
        }

        /// A certificate store to hold trusted X509 certificates.
        pub struct X509Store;
        impl_deref! {X509Store => X509StoreRef}

        /// Reference to an `X509Store`.
        pub struct X509StoreRef;
    }

    pub mod verify {
        /// Reference to `X509VerifyParam`.
        pub struct X509VerifyParamRef;
    }
}

pub mod nid {
    /// A numerical identifier for an OpenSSL object.
    pub struct Nid;
    impl Nid {
        pub const COMMONNAME: Nid = Self {};
        pub const ORGANIZATIONNAME: Nid = Self {};
        pub const ORGANIZATIONALUNITNAME: Nid = Self {};
    }
}

pub mod pkey {
    use super::error::ErrorStack;

    /// A public or private key.
    #[derive(Clone)]
    pub struct PKey<T> {
        marker: std::marker::PhantomData<T>,
    }
    impl<T> std::ops::Deref for PKey<T> {
        type Target = PKeyRef<T>;
        fn deref(&self) -> &PKeyRef<T> {
            panic!("Not implemented");
        }
    }
    impl<T> std::ops::DerefMut for PKey<T> {
        fn deref_mut(&mut self) -> &mut PKeyRef<T> {
            panic!("Not implemented");
        }
    }
    impl PKey<Private> {
        pub fn private_key_from_pem(_pem: &[u8]) -> Result<PKey<Private>, ErrorStack> {
            panic!("Not implemented");
        }
    }

    /// Reference to `PKey`.
    pub struct PKeyRef<T> {
        marker: std::marker::PhantomData<T>,
    }

    /// A tag type indicating that a key has private components.
    #[derive(Clone)]
    pub enum Private {}
    unsafe impl HasPrivate for Private {}

    /// A trait indicating that a key has private components.
    pub unsafe trait HasPrivate {}
}

pub mod hash {
    /// A message digest algorithm.
    pub struct MessageDigest;
    impl MessageDigest {
        pub fn sha256() -> MessageDigest {
            panic!("Not implemented");
        }
    }

    /// The resulting bytes of a digest.
    pub struct DigestBytes;
    impl AsRef<[u8]> for DigestBytes {
        fn as_ref(&self) -> &[u8] {
            panic!("Not implemented");
        }
    }
}

pub mod asn1 {
    use super::bn::BigNum;
    use super::error::ErrorStack;

    /// A reference to an `Asn1Integer`.
    pub struct Asn1IntegerRef;
    impl Asn1IntegerRef {
        /// Converts the integer to a `BigNum`.
        pub fn to_bn(&self) -> Result<BigNum, ErrorStack> {
            panic!("Not implemented");
        }
    }

    /// A reference to an `Asn1String`.
    pub struct Asn1StringRef;
    impl Asn1StringRef {
        pub fn as_utf8(&self) -> Result<&str, ErrorStack> {
            panic!("Not implemented");
        }
    }

    /// Reference to an `Asn1Time`
    pub struct Asn1TimeRef;
    impl_display! {Asn1TimeRef}
}

pub mod bn {
    use super::error::ErrorStack;

    /// Dynamically sized large number implementation
    pub struct BigNum;
    impl BigNum {
        /// Returns a hexadecimal string representation of `self`.
        pub fn to_hex_str(&self) -> Result<&str, ErrorStack> {
            panic!("Not implemented");
        }
    }
}

pub mod ext {
    use super::error::ErrorStack;
    use super::pkey::{HasPrivate, PKeyRef};
    use super::ssl::{Ssl, SslAcceptor, SslRef};
    use super::x509::store::X509StoreRef;
    use super::x509::verify::X509VerifyParamRef;
    use super::x509::X509Ref;

    /// Add name as an additional reference identifier that can match the peer's certificate
    pub fn add_host(_verify_param: &mut X509VerifyParamRef, _host: &str) -> Result<(), ErrorStack> {
        panic!("Not implemented");
    }

    /// Set the verify cert store of `_ssl`
    pub fn ssl_set_verify_cert_store(
        _ssl: &mut SslRef,
        _cert_store: &X509StoreRef,
    ) -> Result<(), ErrorStack> {
        panic!("Not implemented");
    }

    /// Load the certificate into `_ssl`
    pub fn ssl_use_certificate(_ssl: &mut SslRef, _cert: &X509Ref) -> Result<(), ErrorStack> {
        panic!("Not implemented");
    }

    /// Load the private key into `_ssl`
    pub fn ssl_use_private_key<T>(_ssl: &mut SslRef, _key: &PKeyRef<T>) -> Result<(), ErrorStack>
    where
        T: HasPrivate,
    {
        panic!("Not implemented");
    }

    /// Clear the error stack
    pub fn clear_error_stack() {}

    /// Create a new [Ssl] from &[SslAcceptor]
    pub fn ssl_from_acceptor(_acceptor: &SslAcceptor) -> Result<Ssl, ErrorStack> {
        panic!("Not implemented");
    }

    /// Suspend the TLS handshake when a certificate is needed.
    pub fn suspend_when_need_ssl_cert(_ssl: &mut SslRef) {
        panic!("Not implemented");
    }

    /// Unblock a TLS handshake after the certificate is set.
    pub fn unblock_ssl_cert(_ssl: &mut SslRef) {
        panic!("Not implemented");
    }

    /// Whether the TLS error is SSL_ERROR_WANT_X509_LOOKUP
    pub fn is_suspended_for_cert(_error: &super::ssl::Error) -> bool {
        panic!("Not implemented");
    }

    /// Add the certificate into the cert chain of `_ssl`
    pub fn ssl_add_chain_cert(_ssl: &mut SslRef, _cert: &X509Ref) -> Result<(), ErrorStack> {
        panic!("Not implemented");
    }

    /// Set renegotiation
    pub fn ssl_set_renegotiate_mode_freely(_ssl: &mut SslRef) {}

    /// Set the curves/groups of `_ssl`
    pub fn ssl_set_groups_list(_ssl: &mut SslRef, _groups: &str) -> Result<(), ErrorStack> {
        panic!("Not implemented");
    }

    /// Sets whether a second keyshare to be sent in client hello when PQ is used.
    pub fn ssl_use_second_key_share(_ssl: &mut SslRef, _enabled: bool) {}

    /// Get a mutable SslRef ouf of SslRef, which is a missing functionality even when holding &mut SslStream
    /// # Safety
    pub unsafe fn ssl_mut(_ssl: &SslRef) -> &mut SslRef {
        panic!("Not implemented");
    }
}

pub mod tokio_ssl {
    use std::pin::Pin;
    use std::task::{Context, Poll};
    use tokio::io::{AsyncRead, AsyncWrite, ReadBuf};

    use super::error::ErrorStack;
    use super::ssl::{Error, Ssl, SslRef};

    /// A TLS session over a stream.
    #[derive(Debug)]
    pub struct SslStream<S> {
        marker: std::marker::PhantomData<S>,
    }
    impl<S> SslStream<S> {
        /// Creates a new `SslStream`.
        pub fn new(_ssl: Ssl, _stream: S) -> Result<Self, ErrorStack> {
            panic!("Not implemented");
        }

        /// Initiates a client-side TLS handshake.
        pub async fn connect(self: Pin<&mut Self>) -> Result<(), Error> {
            panic!("Not implemented");
        }

        /// Initiates a server-side TLS handshake.
        pub async fn accept(self: Pin<&mut Self>) -> Result<(), Error> {
            panic!("Not implemented");
        }

        /// Returns a shared reference to the `Ssl` object associated with this stream.
        pub fn ssl(&self) -> &SslRef {
            panic!("Not implemented");
        }

        /// Returns a shared reference to the underlying stream.
        pub fn get_ref(&self) -> &S {
            panic!("Not implemented");
        }

        /// Returns a mutable reference to the underlying stream.
        pub fn get_mut(&mut self) -> &mut S {
            panic!("Not implemented");
        }
    }
    impl<S> AsyncRead for SslStream<S>
    where
        S: AsyncRead + AsyncWrite,
    {
        fn poll_read(
            self: Pin<&mut Self>,
            _ctx: &mut Context<'_>,
            _buf: &mut ReadBuf<'_>,
        ) -> Poll<std::io::Result<()>> {
            panic!("Not implemented");
        }
    }
    impl<S> AsyncWrite for SslStream<S>
    where
        S: AsyncRead + AsyncWrite,
    {
        fn poll_write(
            self: Pin<&mut Self>,
            _ctx: &mut Context<'_>,
            _buf: &[u8],
        ) -> Poll<std::io::Result<usize>> {
            panic!("Not implemented");
        }

        fn poll_flush(self: Pin<&mut Self>, _ctx: &mut Context<'_>) -> Poll<std::io::Result<()>> {
            panic!("Not implemented");
        }

        fn poll_shutdown(
            self: Pin<&mut Self>,
            _ctx: &mut Context<'_>,
        ) -> Poll<std::io::Result<()>> {
            panic!("Not implemented");
        }
    }
}


================================================
FILE: pingora-core/src/upstreams/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! The interface to connect to a remote server

pub mod peer;


================================================
FILE: pingora-core/src/upstreams/peer.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Defines where to connect to and how to connect to a remote server

use crate::connectors::{l4::BindTo, L4Connect};
use crate::protocols::l4::socket::SocketAddr;
use crate::protocols::tls::CaType;
#[cfg(feature = "openssl_derived")]
use crate::protocols::tls::HandshakeCompleteHook;
#[cfg(feature = "s2n")]
use crate::protocols::tls::PskType;
#[cfg(unix)]
use crate::protocols::ConnFdReusable;
use crate::protocols::TcpKeepalive;
use crate::utils::tls::{get_organization_unit, CertKey};
use ahash::AHasher;
use derivative::Derivative;
use pingora_error::{
    ErrorType::{InternalError, SocketError},
    OrErr, Result,
};
#[cfg(feature = "s2n")]
use pingora_s2n::S2NPolicy;
use std::collections::BTreeMap;
use std::fmt::{Display, Formatter, Result as FmtResult};
use std::hash::{Hash, Hasher};
use std::net::{IpAddr, SocketAddr as InetSocketAddr, ToSocketAddrs as ToInetSocketAddrs};
#[cfg(unix)]
use std::os::unix::{net::SocketAddr as UnixSocketAddr, prelude::AsRawFd};
#[cfg(windows)]
use std::os::windows::io::AsRawSocket;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Duration;
use tokio::net::TcpSocket;

pub use crate::protocols::tls::ALPN;

/// A hook function that may generate user data for [`crate::protocols::raw_connect::ProxyDigest`].
///
/// Takes the request and response headers from the proxy connection establishment, and may produce
/// arbitrary data to be stored in ProxyDigest's user_data field.
///
/// This can be useful when, for example, you want to store some parameter(s) from the request or
/// response headers from when the proxy connection was first established.
pub type ProxyDigestUserDataHook = Arc<
    dyn Fn(
            &http::request::Parts,         // request headers
            &pingora_http::ResponseHeader, // response headers
        ) -> Option<Box<dyn std::any::Any + Send + Sync>>
        + Send
        + Sync
        + 'static,
>;

/// The interface to trace the connection
pub trait Tracing: Send + Sync + std::fmt::Debug {
    /// This method is called when successfully connected to a remote server
    fn on_connected(&self);
    /// This method is called when the connection is disconnected.
    fn on_disconnected(&self);
    /// A way to clone itself
    fn boxed_clone(&self) -> Box<dyn Tracing>;
}

/// An object-safe version of Tracing object that can use Clone
#[derive(Debug)]
pub struct Tracer(pub Box<dyn Tracing>);

impl Clone for Tracer {
    fn clone(&self) -> Self {
        Tracer(self.0.boxed_clone())
    }
}

/// [`Peer`] defines the interface to communicate with the [`crate::connectors`] regarding where to
/// connect to and how to connect to it.
pub trait Peer: Display + Clone {
    /// The remote address to connect to
    fn address(&self) -> &SocketAddr;
    /// If TLS should be used;
    fn tls(&self) -> bool;
    /// The SNI to send, if TLS is used
    fn sni(&self) -> &str;
    /// To decide whether a [`Peer`] can use the connection established by another [`Peer`].
    ///
    /// The connections to two peers are considered reusable to each other if their reuse hashes are
    /// the same
    fn reuse_hash(&self) -> u64;
    /// Get the proxy setting to connect to the remote server
    fn get_proxy(&self) -> Option<&Proxy> {
        None
    }
    /// Get the additional options to connect to the peer.
    ///
    /// See [`PeerOptions`] for more details
    fn get_peer_options(&self) -> Option<&PeerOptions> {
        None
    }
    /// Get the additional options for modification.
    fn get_mut_peer_options(&mut self) -> Option<&mut PeerOptions> {
        None
    }
    /// Whether the TLS handshake should validate the cert of the server.
    fn verify_cert(&self) -> bool {
        match self.get_peer_options() {
            Some(opt) => opt.verify_cert,
            None => false,
        }
    }
    /// Whether the TLS handshake should verify that the server cert matches the SNI.
    fn verify_hostname(&self) -> bool {
        match self.get_peer_options() {
            Some(opt) => opt.verify_hostname,
            None => false,
        }
    }
    /// Whether the system trust store should be loaded and used when verifying certificates
    #[cfg(feature = "s2n")]
    fn use_system_certs(&self) -> bool {
        match self.get_peer_options() {
            Some(opt) => opt.use_system_certs,
            None => false,
        }
    }
    /// The alternative common name to use to verify the server cert.
    ///
    /// If the server cert doesn't match the SNI, this name will be used to
    /// verify the cert.
    fn alternative_cn(&self) -> Option<&String> {
        match self.get_peer_options() {
            Some(opt) => opt.alternative_cn.as_ref(),
            None => None,
        }
    }
    /// Information about the local source address this connection should be bound to.
    fn bind_to(&self) -> Option<&BindTo> {
        match self.get_peer_options() {
            Some(opt) => opt.bind_to.as_ref(),
            None => None,
        }
    }
    /// How long connect() call should be wait before it returns a timeout error.
    fn connection_timeout(&self) -> Option<Duration> {
        match self.get_peer_options() {
            Some(opt) => opt.connection_timeout,
            None => None,
        }
    }
    /// How long the overall connection establishment should take before a timeout error is returned.
    fn total_connection_timeout(&self) -> Option<Duration> {
        match self.get_peer_options() {
            Some(opt) => opt.total_connection_timeout,
            None => None,
        }
    }
    /// If the connection can be reused, how long the connection should wait to be reused before it
    /// shuts down.
    fn idle_timeout(&self) -> Option<Duration> {
        self.get_peer_options().and_then(|o| o.idle_timeout)
    }

    /// Get the ALPN preference.
    fn get_alpn(&self) -> Option<&ALPN> {
        self.get_peer_options().map(|opt| &opt.alpn)
    }

    /// Get the CA cert to use to validate the server cert.
    ///
    /// If not set, the default CAs will be used.
    fn get_ca(&self) -> Option<&Arc<CaType>> {
        match self.get_peer_options() {
            Some(opt) => opt.ca.as_ref(),
            None => None,
        }
    }

    /// Get the client cert and key for mutual TLS if any
    fn get_client_cert_key(&self) -> Option<&Arc<CertKey>> {
        None
    }

    /// Get the PSK (pre-shared key) to use to validate the connection
    ///
    /// If not set, PSK validation will not be used
    #[cfg(feature = "s2n")]
    fn get_psk(&self) -> Option<&Arc<PskType>> {
        match self.get_peer_options() {
            Some(opt) => opt.psk.as_ref(),
            None => None,
        }
    }

    /// Get the Security Policy to use for this connection (S2N only)
    ///
    /// If not set, the default policy "default_tls13" will be used
    /// https://aws.github.io/s2n-tls/usage-guide/ch06-security-policies.html
    #[cfg(feature = "s2n")]
    fn get_s2n_security_policy(&self) -> Option<&S2NPolicy> {
        match self.get_peer_options() {
            Some(opt) => opt.s2n_security_policy.as_ref(),
            None => None,
        }
    }

    /// S2N-TLS will delay a response up to the max blinding delay (default 30)
    /// seconds whenever an error triggered by a peer occurs to mitigate against
    /// timing side channels.
    #[cfg(feature = "s2n")]
    fn get_max_blinding_delay(&self) -> Option<u32> {
        match self.get_peer_options() {
            Some(opt) => opt.max_blinding_delay,
            None => None,
        }
    }

    /// The TCP keepalive setting that should be applied to this connection
    fn tcp_keepalive(&self) -> Option<&TcpKeepalive> {
        self.get_peer_options()
            .and_then(|o| o.tcp_keepalive.as_ref())
    }

    /// The interval H2 pings to send to the server if any
    fn h2_ping_interval(&self) -> Option<Duration> {
        self.get_peer_options().and_then(|o| o.h2_ping_interval)
    }

    /// The size of the TCP receive buffer should be limited to. See SO_RCVBUF for more details.
    fn tcp_recv_buf(&self) -> Option<usize> {
        self.get_peer_options().and_then(|o| o.tcp_recv_buf)
    }

    /// The DSCP value that should be applied to the send side of this connection.
    /// See the [RFC](https://datatracker.ietf.org/doc/html/rfc2474) for more details.
    fn dscp(&self) -> Option<u8> {
        self.get_peer_options().and_then(|o| o.dscp)
    }

    /// Whether to enable TCP fast open.
    fn tcp_fast_open(&self) -> bool {
        self.get_peer_options()
            .map(|o| o.tcp_fast_open)
            .unwrap_or_default()
    }

    #[cfg(unix)]
    fn matches_fd<V: AsRawFd>(&self, fd: V) -> bool {
        self.address().check_fd_match(fd)
    }

    #[cfg(windows)]
    fn matches_sock<V: AsRawSocket>(&self, sock: V) -> bool {
        use crate::protocols::ConnSockReusable;
        self.address().check_sock_match(sock)
    }

    fn get_tracer(&self) -> Option<Tracer> {
        None
    }

    /// Returns a hook that should be run before an upstream TCP connection is connected.
    ///
    /// This hook can be used to set additional socket options.
    fn upstream_tcp_sock_tweak_hook(
        &self,
    ) -> Option<&Arc<dyn Fn(&TcpSocket) -> Result<()> + Send + Sync + 'static>> {
        self.get_peer_options()?
            .upstream_tcp_sock_tweak_hook
            .as_ref()
    }

    /// Returns a [`ProxyDigestUserDataHook`] that may generate user data for
    /// [`crate::protocols::raw_connect::ProxyDigest`] when establishing a new proxy connection.
    fn proxy_digest_user_data_hook(&self) -> Option<&ProxyDigestUserDataHook> {
        self.get_peer_options()?
            .proxy_digest_user_data_hook
            .as_ref()
    }

    /// Returns a hook that should be run on TLS handshake completion.
    ///
    /// Any value returned from the returned hook (other than `None`) will be stored in the
    /// `extension` field of `SslDigest`. This allows you to attach custom application-specific
    /// data to the TLS connection, which will be accessible from the HTTP layer via the
    /// `SslDigest` attached to the session digest.
    ///
    /// Currently only enabled for openssl variants with meaningful `TlsRef`s.
    #[cfg(feature = "openssl_derived")]
    fn upstream_tls_handshake_complete_hook(&self) -> Option<&HandshakeCompleteHook> {
        self.get_peer_options()?
            .upstream_tls_handshake_complete_hook
            .as_ref()
    }
}

/// A simple TCP or TLS peer without many complicated settings.
#[derive(Debug, Clone)]
pub struct BasicPeer {
    pub _address: SocketAddr,
    pub sni: String,
    pub options: PeerOptions,
}

impl BasicPeer {
    /// Create a new [`BasicPeer`].
    pub fn new(address: &str) -> Self {
        let addr = SocketAddr::Inet(address.parse().unwrap()); // TODO: check error
        Self::new_from_sockaddr(addr)
    }

    /// Create a new [`BasicPeer`] with the given path to a Unix domain socket.
    #[cfg(unix)]
    pub fn new_uds<P: AsRef<Path>>(path: P) -> Result<Self> {
        let addr = SocketAddr::Unix(
            UnixSocketAddr::from_pathname(path.as_ref())
                .or_err(InternalError, "while creating BasicPeer")?,
        );
        Ok(Self::new_from_sockaddr(addr))
    }

    fn new_from_sockaddr(sockaddr: SocketAddr) -> Self {
        BasicPeer {
            _address: sockaddr,
            sni: "".to_string(), // TODO: add support for SNI
            options: PeerOptions::new(),
        }
    }
}

impl Display for BasicPeer {
    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
        write!(f, "{:?}", self)
    }
}

impl Peer for BasicPeer {
    fn address(&self) -> &SocketAddr {
        &self._address
    }

    fn tls(&self) -> bool {
        !self.sni.is_empty()
    }

    fn bind_to(&self) -> Option<&BindTo> {
        None
    }

    fn sni(&self) -> &str {
        &self.sni
    }

    // TODO: change connection pool to accept u64 instead of String
    fn reuse_hash(&self) -> u64 {
        let mut hasher = AHasher::default();
        self._address.hash(&mut hasher);
        hasher.finish()
    }

    fn get_peer_options(&self) -> Option<&PeerOptions> {
        Some(&self.options)
    }
}

/// Define whether to connect via http or https
#[derive(Hash, Clone, Debug, PartialEq)]
pub enum Scheme {
    HTTP,
    HTTPS,
}

impl Display for Scheme {
    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
        match self {
            Scheme::HTTP => write!(f, "HTTP"),
            Scheme::HTTPS => write!(f, "HTTPS"),
        }
    }
}

impl Scheme {
    pub fn from_tls_bool(tls: bool) -> Self {
        if tls {
            Self::HTTPS
        } else {
            Self::HTTP
        }
    }
}

/// The preferences to connect to a remote server
///
/// See [`Peer`] for the meaning of the fields
#[non_exhaustive]
#[derive(Clone, Derivative)]
#[derivative(Debug)]
pub struct PeerOptions {
    pub bind_to: Option<BindTo>,
    pub connection_timeout: Option<Duration>,
    pub total_connection_timeout: Option<Duration>,
    pub read_timeout: Option<Duration>,
    pub idle_timeout: Option<Duration>,
    pub write_timeout: Option<Duration>,
    pub verify_cert: bool,
    pub verify_hostname: bool,
    #[cfg(feature = "s2n")]
    pub use_system_certs: bool,
    /* accept the cert if it's CN matches the SNI or this name */
    pub alternative_cn: Option<String>,
    pub alpn: ALPN,
    pub ca: Option<Arc<CaType>>,
    pub tcp_keepalive: Option<TcpKeepalive>,
    pub tcp_recv_buf: Option<usize>,
    pub dscp: Option<u8>,
    pub h2_ping_interval: Option<Duration>,
    #[cfg(feature = "s2n")]
    pub psk: Option<Arc<PskType>>,
    #[cfg(feature = "s2n")]
    pub s2n_security_policy: Option<S2NPolicy>,
    #[cfg(feature = "s2n")]
    pub max_blinding_delay: Option<u32>,
    // how many concurrent h2 stream are allowed in the same connection
    pub max_h2_streams: usize,
    /// Allow invalid Content-Length in HTTP/1 responses (non-RFC compliant).
    ///
    /// When enabled, invalid Content-Length responses are treated as close-delimited responses.
    ///
    /// **Note:** This field is unstable and may be removed or changed in future versions.
    /// It exists primarily for compatibility with legacy servers that send malformed headers.
    pub allow_h1_response_invalid_content_length: bool,
    pub extra_proxy_headers: BTreeMap<String, Vec<u8>>,
    // The list of curve the tls connection should advertise
    // if `None`, the default curves will be used
    pub curves: Option<&'static str>,
    // see ssl_use_second_key_share
    pub second_keyshare: bool,
    // whether to enable TCP fast open
    pub tcp_fast_open: bool,
    // use Arc because Clone is required but not allowed in trait object
    pub tracer: Option<Tracer>,
    // A custom L4 connector to use to establish new L4 connections
    pub custom_l4: Option<Arc<dyn L4Connect + Send + Sync>>,
    #[derivative(Debug = "ignore")]
    pub upstream_tcp_sock_tweak_hook:
        Option<Arc<dyn Fn(&TcpSocket) -> Result<()> + Send + Sync + 'static>>,
    #[derivative(Debug = "ignore")]
    pub proxy_digest_user_data_hook: Option<ProxyDigestUserDataHook>,
    /// Hook that allows returning an optional `SslDigestExtension`.
    /// Any returned value will be saved into the `SslDigest`.
    ///
    /// Currently only enabled for openssl variants with meaningful `TlsRef`s.
    #[cfg(feature = "openssl_derived")]
    #[derivative(Debug = "ignore")]
    pub upstream_tls_handshake_complete_hook: Option<HandshakeCompleteHook>,
}

impl PeerOptions {
    /// Create a new [`PeerOptions`]
    pub fn new() -> Self {
        PeerOptions {
            bind_to: None,
            connection_timeout: None,
            total_connection_timeout: None,
            read_timeout: None,
            idle_timeout: None,
            write_timeout: None,
            verify_cert: true,
            verify_hostname: true,
            #[cfg(feature = "s2n")]
            use_system_certs: true,
            alternative_cn: None,
            alpn: ALPN::H1,
            ca: None,
            tcp_keepalive: None,
            tcp_recv_buf: None,
            dscp: None,
            h2_ping_interval: None,
            #[cfg(feature = "s2n")]
            psk: None,
            #[cfg(feature = "s2n")]
            s2n_security_policy: None,
            #[cfg(feature = "s2n")]
            max_blinding_delay: None,
            max_h2_streams: 1,
            allow_h1_response_invalid_content_length: false,
            extra_proxy_headers: BTreeMap::new(),
            curves: None,
            second_keyshare: true, // default true and noop when not using PQ curves
            tcp_fast_open: false,
            tracer: None,
            custom_l4: None,
            upstream_tcp_sock_tweak_hook: None,
            proxy_digest_user_data_hook: None,
            #[cfg(feature = "openssl_derived")]
            upstream_tls_handshake_complete_hook: None,
        }
    }

    /// Set the ALPN according to the `max` and `min` constrains.
    pub fn set_http_version(&mut self, max: u8, min: u8) {
        self.alpn = ALPN::new(max, min);
    }
}

impl Display for PeerOptions {
    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
        if let Some(b) = self.bind_to.as_ref() {
            write!(f, "bind_to: {:?},", b)?;
        }
        if let Some(t) = self.connection_timeout {
            write!(f, "conn_timeout: {:?},", t)?;
        }
        if let Some(t) = self.total_connection_timeout {
            write!(f, "total_conn_timeout: {:?},", t)?;
        }
        if self.verify_cert {
            write!(f, "verify_cert: true,")?;
        }
        if self.verify_hostname {
            write!(f, "verify_hostname: true,")?;
        }
        #[cfg(feature = "s2n")]
        if self.use_system_certs {
            write!(f, "use_system_certs: true,")?;
        }
        if let Some(cn) = &self.alternative_cn {
            write!(f, "alt_cn: {},", cn)?;
        }
        write!(f, "alpn: {},", self.alpn)?;
        if let Some(cas) = &self.ca {
            for ca in cas.iter() {
                write!(
                    f,
                    "CA: {}, expire: {},",
                    get_organization_unit(ca).unwrap_or_default(),
                    ca.not_after()
                )?;
            }
        }
        #[cfg(feature = "s2n")]
        if let Some(policy) = &self.s2n_security_policy {
            write!(f, "s2n_security_policy: {:?}, ", policy)?;
        }
        #[cfg(feature = "s2n")]
        if let Some(psk_config) = &self.psk {
            for psk in &psk_config.keys {
                write!(
                    f,
                    "psk_identity: {}",
                    String::from_utf8_lossy(psk.identity.as_slice())
                )?;
            }
        }
        if let Some(tcp_keepalive) = &self.tcp_keepalive {
            write!(f, "tcp_keepalive: {},", tcp_keepalive)?;
        }
        if let Some(h2_ping_interval) = self.h2_ping_interval {
            write!(f, "h2_ping_interval: {:?},", h2_ping_interval)?;
        }
        Ok(())
    }
}

/// A peer representing the remote HTTP server to connect to
#[derive(Debug, Clone)]
pub struct HttpPeer {
    pub _address: SocketAddr,
    pub scheme: Scheme,
    pub sni: String,
    pub proxy: Option<Proxy>,
    pub client_cert_key: Option<Arc<CertKey>>,
    /// a custom field to isolate connection reuse. Requests with different group keys
    /// cannot share connections with each other.
    pub group_key: u64,
    pub options: PeerOptions,
}

impl HttpPeer {
    // These methods are pretty ad-hoc
    pub fn is_tls(&self) -> bool {
        match self.scheme {
            Scheme::HTTP => false,
            Scheme::HTTPS => true,
        }
    }

    fn new_from_sockaddr(address: SocketAddr, tls: bool, sni: String) -> Self {
        HttpPeer {
            _address: address,
            scheme: Scheme::from_tls_bool(tls),
            sni,
            proxy: None,
            client_cert_key: None,
            group_key: 0,
            options: PeerOptions::new(),
        }
    }

    /// Create a new [`HttpPeer`] with the given socket address and TLS settings.
    pub fn new<A: ToInetSocketAddrs>(address: A, tls: bool, sni: String) -> Self {
        let mut addrs_iter = address.to_socket_addrs().unwrap(); //TODO: handle error
        let addr = addrs_iter.next().unwrap();
        Self::new_from_sockaddr(SocketAddr::Inet(addr), tls, sni)
    }

    /// Create a new [`HttpPeer`] with the given path to Unix domain socket and TLS settings.
    #[cfg(unix)]
    pub fn new_uds(path: &str, tls: bool, sni: String) -> Result<Self> {
        let addr = SocketAddr::Unix(
            UnixSocketAddr::from_pathname(Path::new(path)).or_err(SocketError, "invalid path")?,
        );
        Ok(Self::new_from_sockaddr(addr, tls, sni))
    }

    /// Create a new [`HttpPeer`] that uses a proxy to connect to the upstream IP and port
    /// combination.
    pub fn new_proxy(
        next_hop: &str,
        ip_addr: IpAddr,
        port: u16,
        tls: bool,
        sni: &str,
        headers: BTreeMap<String, Vec<u8>>,
    ) -> Self {
        HttpPeer {
            _address: SocketAddr::Inet(InetSocketAddr::new(ip_addr, port)),
            scheme: Scheme::from_tls_bool(tls),
            sni: sni.to_string(),
            proxy: Some(Proxy {
                next_hop: PathBuf::from(next_hop).into(),
                host: ip_addr.to_string(),
                port,
                headers,
            }),
            client_cert_key: None,
            group_key: 0,
            options: PeerOptions::new(),
        }
    }

    /// Create a new [`HttpPeer`] with client certificate and key for mutual TLS.
    pub fn new_mtls<A: ToInetSocketAddrs>(
        address: A,
        sni: String,
        client_cert_key: Arc<CertKey>,
    ) -> Self {
        let mut peer = Self::new(address, true, sni);
        peer.client_cert_key = Some(client_cert_key);
        peer
    }

    fn peer_hash(&self) -> u64 {
        let mut hasher = AHasher::default();
        self.hash(&mut hasher);
        hasher.finish()
    }
}

impl Hash for HttpPeer {
    fn hash<H: Hasher>(&self, state: &mut H) {
        self._address.hash(state);
        self.scheme.hash(state);
        self.proxy.hash(state);
        self.sni.hash(state);
        // client cert serial
        self.client_cert_key.hash(state);
        // origin server cert verification
        self.verify_cert().hash(state);
        self.verify_hostname().hash(state);
        self.alternative_cn().hash(state);
        #[cfg(feature = "s2n")]
        self.get_psk().hash(state);
        self.group_key.hash(state);
        // max h2 stream settings
        self.options.max_h2_streams.hash(state);
    }
}

impl Display for HttpPeer {
    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
        write!(f, "addr: {}, scheme: {}", self._address, self.scheme)?;
        if !self.sni.is_empty() {
            write!(f, ", sni: {}", self.sni)?;
        }
        if let Some(p) = self.proxy.as_ref() {
            write!(f, ", proxy: {p}")?;
        }
        if let Some(cert) = &self.client_cert_key {
            write!(f, ", client cert: {}", cert)?;
        }
        Ok(())
    }
}

impl Peer for HttpPeer {
    fn address(&self) -> &SocketAddr {
        &self._address
    }

    fn tls(&self) -> bool {
        self.is_tls()
    }

    fn sni(&self) -> &str {
        &self.sni
    }

    // TODO: change connection pool to accept u64 instead of String
    fn reuse_hash(&self) -> u64 {
        self.peer_hash()
    }

    fn get_peer_options(&self) -> Option<&PeerOptions> {
        Some(&self.options)
    }

    fn get_mut_peer_options(&mut self) -> Option<&mut PeerOptions> {
        Some(&mut self.options)
    }

    fn get_proxy(&self) -> Option<&Proxy> {
        self.proxy.as_ref()
    }

    #[cfg(unix)]
    fn matches_fd<V: AsRawFd>(&self, fd: V) -> bool {
        if let Some(proxy) = self.get_proxy() {
            proxy.next_hop.check_fd_match(fd)
        } else {
            self.address().check_fd_match(fd)
        }
    }

    #[cfg(windows)]
    fn matches_sock<V: AsRawSocket>(&self, sock: V) -> bool {
        use crate::protocols::ConnSockReusable;

        if let Some(proxy) = self.get_proxy() {
            panic!("windows do not support peers with proxy")
        } else {
            self.address().check_sock_match(sock)
        }
    }

    fn get_client_cert_key(&self) -> Option<&Arc<CertKey>> {
        self.client_cert_key.as_ref()
    }

    fn get_tracer(&self) -> Option<Tracer> {
        self.options.tracer.clone()
    }
}

/// The proxy settings to connect to the remote server, CONNECT only for now
#[derive(Debug, Hash, Clone)]
pub struct Proxy {
    pub next_hop: Box<Path>, // for now this will be the path to the UDS
    pub host: String,        // the proxied host. Could be either IP addr or hostname.
    pub port: u16,           // the port to proxy to
    pub headers: BTreeMap<String, Vec<u8>>, // the additional headers to add to CONNECT
}

impl Display for Proxy {
    fn fmt(&self, f: &mut Formatter) -> FmtResult {
        write!(
            f,
            "next_hop: {}, host: {}, port: {}",
            self.next_hop.display(),
            self.host,
            self.port
        )
    }
}


================================================
FILE: pingora-core/src/utils/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! This module contains various types that make it easier to work with bytes and X509
//! certificates.

#[cfg(feature = "any_tls")]
pub mod tls;

#[cfg(not(feature = "any_tls"))]
pub use crate::tls::utils as tls;

use bytes::Bytes;

/// A `BufRef` is a reference to a buffer of bytes. It removes the need for self-referential data
/// structures. It is safe to use as long as the underlying buffer does not get mutated.
///
/// # Panics
///
/// This will panic if an index is out of bounds.
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct BufRef(pub usize, pub usize);

impl BufRef {
    /// Return a sub-slice of `buf`.
    pub fn get<'a>(&self, buf: &'a [u8]) -> &'a [u8] {
        &buf[self.0..self.1]
    }

    /// Return a slice of `buf`. This operation is O(1) and increases the reference count of `buf`.
    pub fn get_bytes(&self, buf: &Bytes) -> Bytes {
        buf.slice(self.0..self.1)
    }

    /// Return the size of the slice reference.
    pub fn len(&self) -> usize {
        self.1 - self.0
    }

    /// Return true if the length is zero.
    pub fn is_empty(&self) -> bool {
        self.1 == self.0
    }
}

impl BufRef {
    /// Initialize a `BufRef` that can reference a slice beginning at index `start` and has a
    /// length of `len`.
    pub fn new(start: usize, len: usize) -> Self {
        BufRef(start, start + len)
    }
}

/// A `KVRef` contains a key name and value pair, stored as two [BufRef] types.
#[derive(Clone)]
pub struct KVRef {
    name: BufRef,
    value: BufRef,
}

impl KVRef {
    /// Like [BufRef::get] for the name.
    pub fn get_name<'a>(&self, buf: &'a [u8]) -> &'a [u8] {
        self.name.get(buf)
    }

    /// Like [BufRef::get] for the value.
    pub fn get_value<'a>(&self, buf: &'a [u8]) -> &'a [u8] {
        self.value.get(buf)
    }

    /// Like [BufRef::get_bytes] for the name.
    pub fn get_name_bytes(&self, buf: &Bytes) -> Bytes {
        self.name.get_bytes(buf)
    }

    /// Like [BufRef::get_bytes] for the value.
    pub fn get_value_bytes(&self, buf: &Bytes) -> Bytes {
        self.value.get_bytes(buf)
    }

    /// Return a new `KVRef` with name and value start indices and lengths.
    pub fn new(name_s: usize, name_len: usize, value_s: usize, value_len: usize) -> Self {
        KVRef {
            name: BufRef(name_s, name_s + name_len),
            value: BufRef(value_s, value_s + value_len),
        }
    }

    /// Return a reference to the value.
    pub fn value(&self) -> &BufRef {
        &self.value
    }
}

/// A [KVRef] which contains empty sub-slices.
pub const EMPTY_KV_REF: KVRef = KVRef {
    name: BufRef(0, 0),
    value: BufRef(0, 0),
};


================================================
FILE: pingora-core/src/utils/tls/boringssl_openssl.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use crate::tls::{nid::Nid, pkey::PKey, pkey::Private, x509::X509};
use crate::Result;
use pingora_error::{ErrorType::*, OrErr};
use std::hash::{Hash, Hasher};

fn get_subject_name(cert: &X509, name_type: Nid) -> Option<String> {
    cert.subject_name()
        .entries_by_nid(name_type)
        .next()
        .map(|name| {
            name.data()
                .as_utf8()
                .map(|s| s.to_string())
                .unwrap_or_default()
        })
}

/// Return the organization associated with the X509 certificate.
pub fn get_organization(cert: &X509) -> Option<String> {
    get_subject_name(cert, Nid::ORGANIZATIONNAME)
}

/// Return the common name associated with the X509 certificate.
pub fn get_common_name(cert: &X509) -> Option<String> {
    get_subject_name(cert, Nid::COMMONNAME)
}

/// Return the common name associated with the X509 certificate.
pub fn get_organization_unit(cert: &X509) -> Option<String> {
    get_subject_name(cert, Nid::ORGANIZATIONALUNITNAME)
}

/// Return the serial number associated with the X509 certificate as a hexadecimal value.
pub fn get_serial(cert: &X509) -> Result<String> {
    let bn = cert
        .serial_number()
        .to_bn()
        .or_err(InvalidCert, "Invalid serial")?;
    let hex = bn.to_hex_str().or_err(InvalidCert, "Invalid serial")?;

    let hex_str: &str = hex.as_ref();
    Ok(hex_str.to_owned())
}

/// This type contains a list of one or more certificates and an associated private key. The leaf
/// certificate should always be first.
#[derive(Clone)]
pub struct CertKey {
    certificates: Vec<X509>,
    key: PKey<Private>,
}

impl CertKey {
    /// Create a new `CertKey` given a list of certificates and a private key.
    pub fn new(certificates: Vec<X509>, key: PKey<Private>) -> CertKey {
        assert!(
            !certificates.is_empty(),
            "expected a non-empty vector of certificates in CertKey::new"
        );

        CertKey { certificates, key }
    }

    /// Peek at the leaf certificate.
    pub fn leaf(&self) -> &X509 {
        // This is safe due to the assertion above.
        &self.certificates[0]
    }

    /// Return the key.
    pub fn key(&self) -> &PKey<Private> {
        &self.key
    }

    /// Return a slice of intermediate certificates. An empty slice means there are none.
    pub fn intermediates(&self) -> &[X509] {
        if self.certificates.len() <= 1 {
            return &[];
        }
        &self.certificates[1..]
    }

    /// Return the organization from the leaf certificate.
    pub fn organization(&self) -> Option<String> {
        get_organization(self.leaf())
    }

    /// Return the serial from the leaf certificate.
    pub fn serial(&self) -> Result<String> {
        get_serial(self.leaf())
    }
}

impl Hash for CertKey {
    fn hash<H: Hasher>(&self, state: &mut H) {
        for certificate in &self.certificates {
            if let Ok(serial) = get_serial(certificate) {
                serial.hash(state)
            }
        }
    }
}

// hide private key
impl std::fmt::Debug for CertKey {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("CertKey")
            .field("X509", &self.leaf())
            .finish()
    }
}

impl std::fmt::Display for CertKey {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let leaf = self.leaf();
        if let Some(cn) = get_common_name(leaf) {
            // Write CN if it exists
            write!(f, "CN: {cn},")?;
        } else if let Some(org_unit) = get_organization_unit(leaf) {
            // CA cert might not have CN, so print its unit name instead
            write!(f, "Org Unit: {org_unit},")?;
        }
        write!(f, ", expire: {}", leaf.not_after())
        // ignore the details of the private key
    }
}


================================================
FILE: pingora-core/src/utils/tls/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#[cfg(feature = "openssl_derived")]
mod boringssl_openssl;

#[cfg(feature = "openssl_derived")]
pub use boringssl_openssl::*;

#[cfg(feature = "rustls")]
mod rustls;

#[cfg(feature = "rustls")]
pub use rustls::*;

#[cfg(feature = "s2n")]
mod s2n;

#[cfg(feature = "s2n")]
pub use s2n::*;


================================================
FILE: pingora-core/src/utils/tls/rustls.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use ouroboros::self_referencing;
use pingora_error::Result;
use pingora_rustls::CertificateDer;
use std::hash::{Hash, Hasher};
use x509_parser::prelude::{FromDer, X509Certificate};

/// Get the organization and serial number associated with the given certificate
/// see https://en.wikipedia.org/wiki/X.509#Structure_of_a_certificate
pub fn get_organization_serial(x509cert: &WrappedX509) -> Result<(Option<String>, String)> {
    let serial = get_serial(x509cert)?;
    Ok((get_organization(x509cert), serial))
}

fn get_organization_serial_x509(
    x509cert: &X509Certificate<'_>,
) -> Result<(Option<String>, String)> {
    let serial = x509cert.raw_serial_as_string();
    Ok((get_organization_x509(x509cert), serial))
}

/// Get the serial number associated with the given certificate
/// see https://en.wikipedia.org/wiki/X.509#Structure_of_a_certificate
pub fn get_serial(x509cert: &WrappedX509) -> Result<String> {
    Ok(x509cert.borrow_cert().raw_serial_as_string())
}

/// Return the organization associated with the X509 certificate.
/// see https://en.wikipedia.org/wiki/X.509#Structure_of_a_certificate
pub fn get_organization(x509cert: &WrappedX509) -> Option<String> {
    get_organization_x509(x509cert.borrow_cert())
}

/// Return the organization associated with the X509 certificate.
/// see https://en.wikipedia.org/wiki/X.509#Structure_of_a_certificate
pub fn get_organization_x509(x509cert: &X509Certificate<'_>) -> Option<String> {
    x509cert
        .subject
        .iter_organization()
        .filter_map(|a| a.as_str().ok())
        .map(|a| a.to_string())
        .reduce(|cur, next| cur + &next)
}

/// Return the organization associated with the X509 certificate (as bytes).
/// see https://en.wikipedia.org/wiki/X.509#Structure_of_a_certificate
pub fn get_organization_serial_bytes(cert: &[u8]) -> Result<(Option<String>, String)> {
    let (_, x509cert) = x509_parser::certificate::X509Certificate::from_der(cert)
        .expect("Failed to parse certificate from DER format.");

    get_organization_serial_x509(&x509cert)
}

/// Return the organization unit associated with the X509 certificate.
/// see https://en.wikipedia.org/wiki/X.509#Structure_of_a_certificate
pub fn get_organization_unit(x509cert: &WrappedX509) -> Option<String> {
    x509cert
        .borrow_cert()
        .subject
        .iter_organizational_unit()
        .filter_map(|a| a.as_str().ok())
        .map(|a| a.to_string())
        .reduce(|cur, next| cur + &next)
}

/// Get a combination of the common names for the given certificate
/// see https://en.wikipedia.org/wiki/X.509#Structure_of_a_certificate
pub fn get_common_name(x509cert: &WrappedX509) -> Option<String> {
    x509cert
        .borrow_cert()
        .subject
        .iter_common_name()
        .filter_map(|a| a.as_str().ok())
        .map(|a| a.to_string())
        .reduce(|cur, next| cur + &next)
}

/// Get the `not_after` field for the valid time period for the given cert
/// see https://en.wikipedia.org/wiki/X.509#Structure_of_a_certificate
pub fn get_not_after(x509cert: &WrappedX509) -> String {
    x509cert.borrow_cert().validity.not_after.to_string()
}

/// This type contains a list of one or more certificates and an associated private key. The leaf
/// certificate should always be first.
pub struct CertKey {
    key: Vec<u8>,
    certificates: Vec<WrappedX509>,
}

#[self_referencing]
#[derive(Debug)]
pub struct WrappedX509 {
    raw_cert: Vec<u8>,

    #[borrows(raw_cert)]
    #[covariant]
    cert: X509Certificate<'this>,
}

fn parse_x509<C>(raw_cert: &C) -> X509Certificate<'_>
where
    C: AsRef<[u8]>,
{
    X509Certificate::from_der(raw_cert.as_ref())
        .expect("Failed to parse certificate from DER format.")
        .1
}

impl Clone for CertKey {
    fn clone(&self) -> Self {
        CertKey {
            key: self.key.clone(),
            certificates: self
                .certificates
                .iter()
                .map(|wrapper| WrappedX509::new(wrapper.borrow_raw_cert().clone(), parse_x509))
                .collect::<Vec<_>>(),
        }
    }
}

impl CertKey {
    /// Create a new `CertKey` given a list of certificates and a private key.
    pub fn new(certificates: Vec<Vec<u8>>, key: Vec<u8>) -> CertKey {
        assert!(
            !certificates.is_empty() && !certificates.first().unwrap().is_empty(),
            "expected a non-empty vector of certificates in CertKey::new"
        );

        CertKey {
            key,
            certificates: certificates
                .into_iter()
                .map(|raw_cert| WrappedX509::new(raw_cert, parse_x509))
                .collect::<Vec<_>>(),
        }
    }

    /// Peek at the leaf certificate.
    pub fn leaf(&self) -> &WrappedX509 {
        // This is safe due to the assertion in creation of a `CertKey`
        &self.certificates[0]
    }

    /// Return the key.
    pub fn key(&self) -> &Vec<u8> {
        &self.key
    }

    /// Return a slice of intermediate certificates. An empty slice means there are none.
    pub fn intermediates(&self) -> Vec<&WrappedX509> {
        self.certificates.iter().skip(1).collect()
    }

    /// Return the organization from the leaf certificate.
    pub fn organization(&self) -> Option<String> {
        get_organization(self.leaf())
    }

    /// Return the serial from the leaf certificate.
    pub fn serial(&self) -> String {
        get_serial(self.leaf()).unwrap()
    }
}

impl WrappedX509 {
    pub fn not_after(&self) -> String {
        self.borrow_cert().validity.not_after.to_string()
    }
}

// hide private key
impl std::fmt::Debug for CertKey {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("CertKey")
            .field("X509", &self.leaf())
            .finish()
    }
}

impl std::fmt::Display for CertKey {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let leaf = self.leaf();
        if let Some(cn) = get_common_name(leaf) {
            // Write CN if it exists
            write!(f, "CN: {cn},")?;
        } else if let Some(org_unit) = get_organization_unit(leaf) {
            // CA cert might not have CN, so print its unit name instead
            write!(f, "Org Unit: {org_unit},")?;
        }
        write!(f, ", expire: {}", get_not_after(leaf))
        // ignore the details of the private key
    }
}

impl Hash for CertKey {
    fn hash<H: Hasher>(&self, state: &mut H) {
        for certificate in &self.certificates {
            if let Ok(serial) = get_serial(certificate) {
                serial.hash(state)
            }
        }
    }
}

impl<'a> From<&'a WrappedX509> for CertificateDer<'static> {
    fn from(value: &'a WrappedX509) -> Self {
        CertificateDer::from(value.borrow_raw_cert().as_slice().to_owned())
    }
}


================================================
FILE: pingora-core/src/utils/tls/s2n.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use ouroboros::self_referencing;
use pingora_error::Result;
use std::hash::{Hash, Hasher};
use x509_parser::{
    pem::Pem,
    prelude::{FromDer, X509Certificate},
};

fn get_organization_serial_x509(
    x509cert: &X509Certificate<'_>,
) -> Result<(Option<String>, String)> {
    let serial = x509cert.raw_serial_as_string();
    Ok((get_organization_x509(x509cert), serial))
}

/// Get the serial number associated with the given certificate
/// see https://en.wikipedia.org/wiki/X.509#Structure_of_a_certificate
pub fn get_serial(x509cert: &WrappedX509) -> Result<String> {
    Ok(x509cert.borrow_cert().raw_serial_as_string())
}

/// Return the organization associated with the X509 certificate.
/// see https://en.wikipedia.org/wiki/X.509#Structure_of_a_certificate
pub fn get_organization(x509cert: &WrappedX509) -> Option<String> {
    get_organization_x509(x509cert.borrow_cert())
}

/// Return the organization associated with the X509 certificate.
/// see https://en.wikipedia.org/wiki/X.509#Structure_of_a_certificate
pub fn get_organization_x509(x509cert: &X509Certificate<'_>) -> Option<String> {
    x509cert
        .subject
        .iter_organization()
        .filter_map(|a| a.as_str().ok())
        .map(|a| a.to_string())
        .reduce(|cur, next| cur + &next)
}

/// Return the organization associated with the X509 certificate (as bytes).
/// see https://en.wikipedia.org/wiki/X.509#Structure_of_a_certificate
pub fn get_organization_serial_bytes(cert: &[u8]) -> Result<(Option<String>, String)> {
    let (_, x509cert) = x509_parser::certificate::X509Certificate::from_der(cert)
        .expect("Failed to parse certificate from DER format.");

    get_organization_serial_x509(&x509cert)
}

/// Return the organization unit associated with the X509 certificate.
/// see https://en.wikipedia.org/wiki/X.509#Structure_of_a_certificate
pub fn get_organization_unit(x509cert: &WrappedX509) -> Option<String> {
    x509cert
        .borrow_cert()
        .subject
        .iter_organizational_unit()
        .filter_map(|a| a.as_str().ok())
        .map(|a| a.to_string())
        .reduce(|cur, next| cur + &next)
}

/// Get a combination of the common names for the given certificate
/// see https://en.wikipedia.org/wiki/X.509#Structure_of_a_certificate
pub fn get_common_name(x509cert: &WrappedX509) -> Option<String> {
    x509cert
        .borrow_cert()
        .subject
        .iter_common_name()
        .filter_map(|a| a.as_str().ok())
        .map(|a| a.to_string())
        .reduce(|cur, next| cur + &next)
}

/// Get the `not_after` field for the valid time period for the given cert
/// see https://en.wikipedia.org/wiki/X.509#Structure_of_a_certificate
pub fn get_not_after(x509cert: &WrappedX509) -> String {
    x509cert.borrow_cert().validity.not_after.to_string()
}

/// This type contains a list of one or more certificates and an associated private key. The leaf
/// certificate should always be first.
pub struct CertKey {
    key: Vec<u8>,
    pem: X509Pem,
}

impl CertKey {
    /// Create a new `CertKey` given a list of certificates and a private key.
    pub fn new(pem_bytes: Vec<u8>, key: Vec<u8>) -> CertKey {
        let pem = X509Pem::new(pem_bytes);
        assert!(
            !pem.certs.is_empty(),
            "expected at least one certificate in PEM"
        );

        CertKey { key, pem }
    }

    /// Peek at the leaf certificate.
    pub fn leaf(&self) -> &WrappedX509 {
        // This is safe due to the assertion in creation of a `CertKey`
        &self.pem.certs[0]
    }

    /// Return the key.
    pub fn key(&self) -> &Vec<u8> {
        &self.key
    }

    /// Return a slice of intermediate certificates. An empty slice means there are none.
    pub fn intermediates(&self) -> Vec<&WrappedX509> {
        self.pem.certs.iter().skip(1).collect()
    }

    /// Return the organization from the leaf certificate.
    pub fn organization(&self) -> Option<String> {
        get_organization(self.leaf())
    }

    /// Return the serial from the leaf certificate.
    pub fn serial(&self) -> String {
        get_serial(self.leaf()).unwrap()
    }

    pub fn raw_pem(&self) -> &[u8] {
        &self.pem.raw_pem
    }
}

#[derive(Debug)]
pub struct X509Pem {
    pub raw_pem: Vec<u8>,
    pub certs: Vec<WrappedX509>,
}

impl X509Pem {
    pub fn new(raw_pem: Vec<u8>) -> Self {
        let certs = Pem::iter_from_buffer(&raw_pem)
            .map(|part| {
                let raw_cert = part.expect("Failed to parse PEM").contents;
                WrappedX509::new(raw_cert, parse_x509)
            })
            .collect();
        X509Pem { raw_pem, certs }
    }

    pub fn iter(&self) -> std::slice::Iter<'_, WrappedX509> {
        self.certs.iter()
    }
}

fn parse_x509<C>(raw_cert: &C) -> X509Certificate<'_>
where
    C: AsRef<[u8]>,
{
    X509Certificate::from_der(raw_cert.as_ref())
        .expect("Failed to parse certificate from DER format.")
        .1
}

#[self_referencing]
#[derive(Debug)]
pub struct WrappedX509 {
    raw_cert: Vec<u8>,

    #[borrows(raw_cert)]
    #[covariant]
    cert: X509Certificate<'this>,
}

impl WrappedX509 {
    pub fn not_after(&self) -> String {
        self.borrow_cert().validity.not_after.to_string()
    }
}

// hide private key
impl std::fmt::Debug for CertKey {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("CertKey")
            .field("X509", &self.leaf())
            .finish()
    }
}

impl std::fmt::Display for CertKey {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let leaf = self.leaf();
        if let Some(cn) = get_common_name(leaf) {
            // Write CN if it exists
            write!(f, "CN: {cn},")?;
        } else if let Some(org_unit) = get_organization_unit(leaf) {
            // CA cert might not have CN, so print its unit name instead
            write!(f, "Org Unit: {org_unit},")?;
        }
        write!(f, ", expire: {}", get_not_after(leaf))
        // ignore the details of the private key
    }
}

impl Hash for X509Pem {
    fn hash<H: Hasher>(&self, state: &mut H) {
        for certificate in &self.certs {
            if let Ok(serial) = get_serial(certificate) {
                serial.hash(state)
            }
        }
    }
}

impl Hash for CertKey {
    fn hash<H: Hasher>(&self, state: &mut H) {
        self.pem.hash(state)
    }
}


================================================
FILE: pingora-core/tests/certs/alt-ca.crt
================================================
-----BEGIN CERTIFICATE-----
MIIFzzCCA7egAwIBAgIUdmTkBmGw2cEQiP+uCa1TuTBp1aYwDQYJKoZIhvcNAQEL
BQAwdzELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAlNDMQ0wCwYDVQQHDARDaGFzMRAw
DgYDVQQKDAdUaGUgT3JnMQ4wDAYDVQQLDAVBZG1pbjEPMA0GA1UEAwwGT3JnLUNB
MRkwFwYJKoZIhvcNAQkBFgpvcmdAY2EuY29tMB4XDTI1MDgwNjA0MDc0NFoXDTM1
MDgwNDA0MDc0NFowdzELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAlNDMQ0wCwYDVQQH
DARDaGFzMRAwDgYDVQQKDAdUaGUgT3JnMQ4wDAYDVQQLDAVBZG1pbjEPMA0GA1UE
AwwGT3JnLUNBMRkwFwYJKoZIhvcNAQkBFgpvcmdAY2EuY29tMIICIjANBgkqhkiG
9w0BAQEFAAOCAg8AMIICCgKCAgEAtD+U6aTiu5c9nhACCfESx13zRZ+n3dzZuRac
sX/PrFbpAI0zsLh0ohaXLNnJuPHBSjUNhxSFZeWBuQA7mp69ZoZ8CtzjADR0EML1
KLgvy7BHTH8Oe1MCyqRLJm8pcHh4AnBF02eN+71pfsfkXrr/hNlOnPJAbCpU7Rfz
Wq+w1oeGeC8h+RSgiY/1o28ELRs8SzkQGwlu6WezXEZuq6609c7pmevDD9t6snYx
2A0ON/QgwhjcVyuiFRe9tKovzNkRmROkbfYgINCnYOuTxn+dWW3zmCYVHJYA+FWJ
xwOp86rIeCIk+snL6+pL/M0s1+E2szwY+yWmw6q4NlymyGVpVsF6+vsMX/7JG9q1
hGOFD2Nbu7Qs5OlC7+k1m+fwcTGm74dlbkNIjUguMsfyCGT0vHGZ7JfdGINvOWgP
nOuqyTlkj4GVehlBA37S26007bqdtLrkOxrymFwOpoJuYC7HeuFIk1RX2Lfh+6xQ
h5KVjRdzYfisuegqJPwLpA9YjRBzCCcInmnzsZAkur3/9wXHUcoNP1NVEDWHaCxi
jTRvqJazCBqjLxT9doRjTeuKj/RcHkk79IgR6Oiz2De9AwHSp4+NKngrerdissMH
TcBpO5entMnp6r/IkysMDWSrM6JZK66g3ltxiDWyjlD+BDB/VECdyN1RquQ0ONje
2gYzp/0CAwEAAaNTMFEwHQYDVR0OBBYEFOa/SvZE+jE5JMwxuyIuHTkTXzCLMB8G
A1UdIwQYMBaAFOa/SvZE+jE5JMwxuyIuHTkTXzCLMA8GA1UdEwEB/wQFMAMBAf8w
DQYJKoZIhvcNAQELBQADggIBAHfAOu4dnsYomHFG+jAurG3LG/TPdxiv0g96oJ6h
FwhcbKWcFXWAQckzMloVAymatIqUsfXFlMPhadD51AG6BcgmD7i3co/Gh0o3XUCG
GL6cyacsXPAIuCbqUp4Wgs10BR3ELNrf3ksTQGU58g20KodSmHr9ttSeDpK7x2tA
NS/fy/T1lFsRiLQi7bYb3KljnSTllJ3TIexd2gHaS7Varr/N6F9DJT1qMs8xCIlz
gTpJcC/X2fSWaa9IPg9WCzycJjSMoVAw8xfh6Lu9YweKNW7/2eGmZlhpm5MGFV3+
G9UPBayPz7sFoVJaKwr4tE027r+sJddLa4S4GJMW+N5+eVmoQuzZ4C3W/R7LPHTz
Kq+JevifKqUJ5EmuSGAtHa1LfFa2T636lfxfZu4k4JT4H7/58RKJ1X5DYNd3IdGG
b4mg1dmX7JwP2msV92x2ywfIzoFK7ByKHvAOq8inI099y0IMtZGElkv6RMD2OpMl
tWFjIn++VGXNeHdwcWxAzx29q+jqBpzgH9KVP2io8M1X5j+TNh5guTzKLpH0bbPo
Wvr2szsV9+jUj+z49Aik8OjjMsMyfSVRcMU54ZvmkP0VCSkejWf2ermoX9XGL0Q5
KqyzbLSKFbtVtAnqFjqYdwYP+0729xIFO54IETBDjhk66s9irBibanuatfG5ezXT
33ZP
-----END CERTIFICATE-----


================================================
FILE: pingora-core/tests/certs/alt-server.crt
================================================
-----BEGIN CERTIFICATE-----
MIIEsTCCApmgAwIBAgIUcEJNjZRw1qumRzsbm9HSdXyCojkwDQYJKoZIhvcNAQEL
BQAwdzELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAlNDMQ0wCwYDVQQHDARDaGFzMRAw
DgYDVQQKDAdUaGUgT3JnMQ4wDAYDVQQLDAVBZG1pbjEPMA0GA1UEAwwGT3JnLUNB
MRkwFwYJKoZIhvcNAQkBFgpvcmdAY2EuY29tMB4XDTI1MDgwNjA0MTMwMloXDTI2
MDgwNjA0MTMwMlowajELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAlNDMQwwCgYDVQQH
DANDSFMxDDAKBgNVBAoMA09yZzEMMAoGA1UECwwDb3JnMQwwCgYDVQQDDANvcmcx
FjAUBgkqhkiG9w0BCQEWB29yZ0BvcmcwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAw
ggEKAoIBAQDhAN58oAxwa0C34q72BgyFZKa6NlvIZrr+0DaCpILUYAifP5v4RCAI
cf4MAspcQWizzz14aBaV7s4MRBlPv8b6C0m1Acld+C1x3KwjWRfQyIb9LTOQBx+q
b3SkqVvTXeJXqd38cIBXQ4zirHmJ1+m2CULYUoIeZZfeYU/+gv84Q+sRpQce/+wu
0MvsXw3t+3DJNRsC+EkDiZyiBdZb2ochriVMT6IvQVlCB36nQUcPAtBPs5UcfVn0
RenaBdXgzE4CYvyg1qcwhw5mve1gHtsFMuqZTArUlSlUz6YJQKCFkO83IISwUwx7
wKSswN38WIKKsYLti0xReuh7aS8o0kBjAgMBAAGjQjBAMB0GA1UdDgQWBBSC3ZGf
YULa784DsMjCBbSInuHogDAfBgNVHSMEGDAWgBTmv0r2RPoxOSTMMbsiLh05E18w
izANBgkqhkiG9w0BAQsFAAOCAgEAd6OptRoJSZXcTtYinF0LhaNCn5+kKsW0I487
JoPPIgQLXKOT8PkkGYY+p8nucm4zYyp+2Z6CayxfrWORDylteT1cpVgwFw39KqU2
FuGa4nAOV9BTIBer4oUVTS2flkkMnHlDIUKE+yONE5wOyRa/jvQWfMl9bfN0yRAh
e/72YEszBADrQlpWUvyu6Uv3cNi2XPbcty3VSNHkPWs1lHlwY9s2csnSLQpFMN+A
Wq1RADLKWfR9mrDEmzx0V5JIOqY2K0804jpbnD/fkjyIFBmRIUETN+MU8PQdp0W6
8cBh7u9L5UoUweRr/cZFOd0jHJLiCpClXyFOHXsNkT7jN/hcbXRPwSTD6+GY3Opz
Dn3lZlaAbLg+NtHVDageTX2QJ6H/HVpVGxDltM3hiMrub7PTHCG6GyyWKII3wcXd
875+EqHMwdRYnHb9jtA20GDeG+NGQ5IUJvMPHivXFWbidV5YXyR1t1UP6HEHRU0D
3i4xYJXJTlA3gCUpfsOLm/0lXg+cLqwaqtXZ3vFViUHE02CE5PZN4QZ1ggAFldLx
HEbjzWdDMR0Qy4g/DsfRss9ve0V/te3F6EjXLf5Cra+5wAWm0xUqLiky5HtND5+q
Ac9j//3tcahDgigl2xHakTA/G4xcUM6IV5SxoRUeYQqJ8mHorQyoEkgED6LUyLYa
Nq0MAHA=
-----END CERTIFICATE-----


================================================
FILE: pingora-core/tests/certs/ca.crt
================================================
-----BEGIN CERTIFICATE-----
MIIFazCCA1OgAwIBAgIUcvULnZbENoxhjvv5TARdWr24pXcwDQYJKoZIhvcNAQEL
BQAwRTELMAkGA1UEBhMCQVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM
GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDAeFw0yNTA4MDQxNzIyMjVaFw0zNTA4
MDIxNzIyMjVaMEUxCzAJBgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEw
HwYDVQQKDBhJbnRlcm5ldCBXaWRnaXRzIFB0eSBMdGQwggIiMA0GCSqGSIb3DQEB
AQUAA4ICDwAwggIKAoICAQC9wyQZCE4DA4fpuufOofpImk4L2y2ubW9SmkzwHypD
KtcQ1yx9bIdHIG9RWoSvDYB+d3mS8uiolkq631anrp2gtiuRchiMJc3kRkzALT9O
HQfebe4lU3V9EanikEX3xhQBzfeqmw6LUWtwRMqR+MvRTOMy1bM61jvTXYY5cBVz
Qrz8/FLSXflamkd651mjCVhrXginLqmYUWuVjftn9R3wml4yoUKc7UIyM1j4dB50
nVrTgILMvfukreDfMu4l48rf8g0+usvT8ROmTum1J3kkCVqGUqChwC2I8kPN8BPQ
CJQgfJFPPplzfoyyLWHBFHXTWNP3ec9m3GCQfyCgzI1kEBjVYr6C3Ukb214N/1dr
P7sh3OHW64pxGy72/RWg5CsDFPs7t3RijJB5282LpxLzbFDviIFp6C1Kg/XRv88M
L5NhXfdqx0MC8Glj9AjGIrmLvB2d+PqKfpHe03eC0RgYGO59K4dwKUEs+P2uYcKm
0yFoOsaQyCpNAbBc6B1rawmJRHCVt8Bi/CLAsWl5N3Dq7TOxDxU1gTcvZxRC40/x
aS89PvMmk4lUP+ueKvsQ3Qwx3+um/Cf7dvlWk3XxljUQTCqKS70XdkOlq82Xyugt
6NyztBQwd56Ms1qoyy3Jx5f2Vvvdp8e0LtxhUliRiuKemPb++uec7uZM6BVZL5YL
yQIDAQABo1MwUTAdBgNVHQ4EFgQUiTft2tOcFNM8OuQQdcfCbfqU1qIwHwYDVR0j
BBgwFoAUiTft2tOcFNM8OuQQdcfCbfqU1qIwDwYDVR0TAQH/BAUwAwEB/zANBgkq
hkiG9w0BAQsFAAOCAgEAI9IA+2QYhZGHNZFiCUClidbinotg51q/+SXz4TOUgxQx
trB+f/247pNUJmBZ++16lfTakh6CCzItgzXWgsI64fzJRQBfYuAJrAb2ApMMk2+l
48wISakDIoXnntokOtQslaoFr5jSG0C3J9CrmHcMk0Z0NTYr0ltMWbkEeFhv1KZH
X4CESm+5D706cXkEzyN4sXFDf68OyJQehlxKCJuvZuO0+DWaFzsKd1wRmlybk8n/
+b63gKwT9ydKdW4ZgidCXwh+Y5trqVeqzWBmxHX1207WjvaHggCI7si1bajiwgNu
CnhfUlueawIKZBiliZgpHYuYkDyiB9NdE3twcoIAhKGcpTTd9hP4i4majq/M+hzi
iox8fHg4HoI7l/cp0LiWYZjoIoaH10Nohn0BLqSDIrigxdrhbF2FNgjWyxjL9HMg
koTaZ7ji2J2ygqINyEBdunwHhC5SUPsLL5sWEHK19PDerJoU0xcbDR3s+KuMXCIG
f9kVZt54DECl9TL70MRzRHnoNkMriLgZiiAMqeTlUOPV3GbWc2G+YEFv3xFhYqY0
ZyW5CvMSYaNpEoe1xz9QNkK6i6dwR29Da8QAYhJPOJaFXDwIsh/telsoMeJyO7Ql
HaMEMOZIu9SaxWYDBwpnZ959VSj/APSSv2d6dIaRE3XmxXt6xK2FGw50HimCsHU=
-----END CERTIFICATE-----


================================================
FILE: pingora-core/tests/certs/server.crt
================================================
-----BEGIN CERTIFICATE-----
MIIEWjCCAkKgAwIBAgIUcEJNjZRw1qumRzsbm9HSdXyCojcwDQYJKoZIhvcNAQEL
BQAwRTELMAkGA1UEBhMCQVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM
GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDAeFw0yNTA4MDYwMzQ2MDJaFw0yNjA4
MDYwMzQ2MDJaMEUxCzAJBgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEw
HwYDVQQKDBhJbnRlcm5ldCBXaWRnaXRzIFB0eSBMdGQwggEiMA0GCSqGSIb3DQEB
AQUAA4IBDwAwggEKAoIBAQCyUoXpICzkly7iNIJYRi310ZbDufM+HuGEhh0TR/j0
TbzunarlSJiDBvJ8QgYMc+SeNdIfE2eEd0yV9/Ukhm8eLTr5ti1lWQKZH85JVnxr
V9fo3eE9OfxpEDgdlt/wkHfmYr/th8ajwMzoUkC5uY+POex0zGBt2eEKr4V9KNI4
3JQKshpWYRaOvfufNZXN5+yivzkON5zE7zPr+BLZdiG4nDhnpkcrvoo6sWnu2Pcv
bbtXJIa41iZWrt0zSEBE5q5Ci3ZFHow5f+AQuEi2E4XsDQv3emFjZ4jvT/deKWgn
und4JGD0PaxCj7mE1DBqmqHDDyxOyHA/L+KzGTR/Yws9AgMBAAGjQjBAMB0GA1Ud
DgQWBBSZ/Fkj1buTR2uj1nw+mWJhrDQvSzAfBgNVHSMEGDAWgBSJN+3a05wU0zw6
5BB1x8Jt+pTWojANBgkqhkiG9w0BAQsFAAOCAgEANqctLxVkBvIJ07W5gzLMKDa2
NPSeN++07A0Abvi4ImXh+yeDdI59uDlbkIG+g1C2zOsyO837dYlAryX1NQ/sfGTt
Dz3Yu1pgk3eFS/Bz7dOFbADWkKX6NdAY5nC5MjQx1hIAnEz0LNp0xyW7WVASoUcO
WJlM0CWBBmFLMWp9FWLD4xYY+hCl4VSu1+I1vktn5vuUfhCX+0e4L/tV1+FNVu9I
odyXmQizreEBXTZvHKYdHCBGnzY7BS/RjA2j/xDT6XH0QU1LA0tu8sAiojcNplcT
HpKS/hLa8OjSPitgHPm0Ce7mzjqTF9H0IZLx78HfeGe4kbitp3iGVcGh9r4AMBX/
HBpQxSEiMDBpPdHLvT9r1+NwScwIOxjberJ1TA3NhaMTFXXji0zpP4hhUt9pG/lQ
FM1HoXM2f53g+m0rHiF1zUPUhpqfd40ktmA+DyPpgxalTNYNjTT3VH9nXYoSVD+6
LW8sVbAMDL9OKZLrXazTYGC1DGAGlFK0e8gjP1zI9sTw21tZlkNvs1syp+Kxq+uv
OOEF0iUXVEMdGpSSfT+P0htYlZvQdrc33m3+ROYuZgS8DKUqgWn1QuKz8G1DcnuG
Np+etROWMf0mKDsOvuVUy9OJuOqWo/rOE3m9sZKkzGdA3sQXdHFtGMp3vqeArUlF
l62Qf0ApIWPdmX8n21U=
-----END CERTIFICATE-----


================================================
FILE: pingora-core/tests/certs/server.key
================================================
-----BEGIN PRIVATE KEY-----
MIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQCyUoXpICzkly7i
NIJYRi310ZbDufM+HuGEhh0TR/j0TbzunarlSJiDBvJ8QgYMc+SeNdIfE2eEd0yV
9/Ukhm8eLTr5ti1lWQKZH85JVnxrV9fo3eE9OfxpEDgdlt/wkHfmYr/th8ajwMzo
UkC5uY+POex0zGBt2eEKr4V9KNI43JQKshpWYRaOvfufNZXN5+yivzkON5zE7zPr
+BLZdiG4nDhnpkcrvoo6sWnu2PcvbbtXJIa41iZWrt0zSEBE5q5Ci3ZFHow5f+AQ
uEi2E4XsDQv3emFjZ4jvT/deKWgnund4JGD0PaxCj7mE1DBqmqHDDyxOyHA/L+Kz
GTR/Yws9AgMBAAECggEADoo/jIF435+7LSsiaK+6PiW7kRSHsqxCb7Oeycxvzn3L
Nrqo7V6kvuRRX9PjWd8WSFcznaCPq4OtvTm1ZafHhjKicSuLWozuMw2enKi+ZuNI
Ad8bp0oj3G474R/E/UDOYfzx0NymFAKbwqK4T9yDSearct+aSkK+gIhMzmaGc6fX
tmqH0MxsLXmpkdMFL5WNU3IvrJvdYAtSJ+Tqq/K8ifkCxUzVxSav1Msd0wXxyWJU
hyU8WzFRzotAguqGu5VQOKUhjzeOC5uoJWcor0OSf8CvOUIGR0orINLf1BaHCGY8
3cIcY7WqTOeVR/q3IxGlDO+0aoMvQSR/BzIw42hmwQKBgQDqDWhnb7lbkaV3uX2l
perMozoa/ycmdvRwAZJqBdHoV1w1nnxjTU/IaVXXL9Fj8gRyJ3YXImfKTFE+Z5cU
7RLAPc+7DKLEMEWBwIzbbFZ+ywEAt1xNqRlLsEJd1UCXsmtF67zeYg6Msd4ckaHe
DIv5qryl/DqUT4t7emX2iJGuwQKBgQDDC0binK0e/HsLbN7HG3xbV6GdsP+HKPlz
RE1R4g3DO4uYd24hJFF2kdkngD9PuVbxIqmDb7C40V2/19KTsKToMBiBGv3b1mne
EJVLwdn5hSOE7G/gJDSQuUAV+Tujg3b3lPTsOaygLbyWNEQBcJh0KRnxOi1Kefvl
aOhRfkV3fQKBgBFTCr5VS8AWaMwS49UGEfoxvtROvKQhO/iqdR757U6oYL/rSkPD
bjtkaKEz/ejK+j9E4n3V4x7bRUw8OLeo0LGAIcczqTyiYhK3oPWA8GoUNq/J4sAw
2xl6I390kIJqB3y2dVV0pqUNaWZt9TBNd3L0i2Ax6lgeBzINnkyAUWBBAoGAEf/Y
atFKqLFkKYnChV1j/In5wDO1YSPG4XxMJmJWIs4787YR070mR2ruP1b2gMT54Qbx
3c9Q371yiWHBbR/AGC1YFZIIG2GOI5AkNvmMxBolTP8E1AqDT1fJMj3t4wke0XpN
n/8yjxWpcbMhE4DwkMe6PSjBRT48oM8toVel0YECgYB6CZrjB6R8C2N6MMJ1HNoF
i1q5VvxaPZ6s7pf0jKh7NTjqh+FGcBOn5ixA3PZAomphHwZDzuwROeaMVzjEJMA/
LX9Bq1beElyTYVINhL9C646D+DJtRa8fElgQBNuaSJJOaP9a2rh4PI9xgn4WLYrA
0xxdIR87iqrUFI11Nm2Dog==
-----END PRIVATE KEY-----


================================================
FILE: pingora-core/tests/keys/key.pem
================================================
-----BEGIN EC PRIVATE KEY-----
MHcCAQEEIN5lAOvtlKwtc/LR8/U77dohJmZS30OuezU9gL6vmm6DoAoGCCqGSM49
AwEHoUQDQgAE2f/1Fm1HjySdokPq2T0F1xxol9nSEYQ+foFINeaWYk+FxMGpriJT
Bb8AGka87cWklw1ZqytfaT6pkureDbTkwg==
-----END EC PRIVATE KEY-----


================================================
FILE: pingora-core/tests/keys/public.pem
================================================
-----BEGIN PUBLIC KEY-----
MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAE2f/1Fm1HjySdokPq2T0F1xxol9nS
EYQ+foFINeaWYk+FxMGpriJTBb8AGka87cWklw1ZqytfaT6pkureDbTkwg==
-----END PUBLIC KEY-----


================================================
FILE: pingora-core/tests/keys/server.crt
================================================
-----BEGIN CERTIFICATE-----
MIIB9zCCAZ2gAwIBAgIUMI7aLvTxyRFCHhw57hGt4U6yupcwCgYIKoZIzj0EAwIw
ZDELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAkNBMRYwFAYDVQQHDA1TYW4gRnJhbmNp
c2NvMRgwFgYDVQQKDA9DbG91ZGZsYXJlLCBJbmMxFjAUBgNVBAMMDW9wZW5ydXN0
eS5vcmcwHhcNMjIwNDExMjExMzEzWhcNMzIwNDA4MjExMzEzWjBkMQswCQYDVQQG
EwJVUzELMAkGA1UECAwCQ0ExFjAUBgNVBAcMDVNhbiBGcmFuY2lzY28xGDAWBgNV
BAoMD0Nsb3VkZmxhcmUsIEluYzEWMBQGA1UEAwwNb3BlbnJ1c3R5Lm9yZzBZMBMG
ByqGSM49AgEGCCqGSM49AwEHA0IABNn/9RZtR48knaJD6tk9BdccaJfZ0hGEPn6B
SDXmlmJPhcTBqa4iUwW/ABpGvO3FpJcNWasrX2k+qZLq3g205MKjLTArMCkGA1Ud
EQQiMCCCDyoub3BlbnJ1c3R5Lm9yZ4INb3BlbnJ1c3R5Lm9yZzAKBggqhkjOPQQD
AgNIADBFAiAjISZ9aEKmobKGlT76idO740J6jPaX/hOrm41MLeg69AIhAJqKrSyz
wD/AAF5fR6tXmBqlnpQOmtxfdy13wDr4MT3h
-----END CERTIFICATE-----


================================================
FILE: pingora-core/tests/keys/server.csr
================================================
-----BEGIN CERTIFICATE REQUEST-----
MIIBJzCBzgIBADBsMQswCQYDVQQGEwJVUzETMBEGA1UECAwKQ2FsaWZvcm5pYTEW
MBQGA1UEBwwNU2FuIEZyYW5jaXNjbzEYMBYGA1UECgwPQ2xvdWRmbGFyZSwgSW5j
MRYwFAYDVQQDDA1vcGVucnVzdHkub3JnMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcD
QgAE2f/1Fm1HjySdokPq2T0F1xxol9nSEYQ+foFINeaWYk+FxMGpriJTBb8AGka8
7cWklw1ZqytfaT6pkureDbTkwqAAMAoGCCqGSM49BAMCA0gAMEUCIFyDN8eamnoY
XydKn2oI7qImigxahyCftzjxkIEV5IKbAiEAo5l72X4U+YTVYmyPPnJIj2v5nA1R
RuUfMh5sXzwlwuM=
-----END CERTIFICATE REQUEST-----


================================================
FILE: pingora-core/tests/nginx.conf
================================================

#user  nobody;
worker_processes  1;

error_log  /dev/stdout;
#error_log  logs/error.log  notice;
#error_log  logs/error.log  info;

pid        logs/nginx.pid;
master_process off;
daemon off;

events {
    worker_connections  4096;
}


http {
    #include       mime.types;
    #default_type  application/octet-stream;

    #log_format  main  '$remote_addr - $remote_user [$time_local] "$request" '
    #                  '$status $body_bytes_sent "$http_referer" '
    #                  '"$http_user_agent" "$http_x_forwarded_for"';

    # access_log  logs/access.log  main;
    access_log  off;

    sendfile        on;
    #tcp_nopush     on;

    #keepalive_timeout  0;
    keepalive_timeout  10;
    keepalive_requests 99999;

    #gzip  on;

    server {
        listen       8000;
        listen       [::]:8000;
        listen       8443 ssl http2;
        #listen       8443 ssl http2;
        server_name  localhost;

        ssl_certificate keys/server.crt;
        ssl_certificate_key keys/key.pem;
        ssl_protocols TLSv1.2;
        ssl_ciphers TLS-AES-128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256;

        #charset koi8-r;

        #access_log  logs/host.access.log  main;

        location / {
            root   /home/yuchen/nfs/tmp;
            index  index.html index.htm;
        }
        location /test {
            keepalive_timeout 20;
            return 200;
        }
        location /test2 {
            keepalive_timeout 0;
            return 200 "hello world";
        }
        location /test3 {
            keepalive_timeout 0;
            return 200;
            #content_by_lua_block {
            #    ngx.print("hello world")
            #}
        }

        location /test4 {
            keepalive_timeout 20;
            rewrite_by_lua_block {
                ngx.exit(200)
            }
            #return 201;

        }

        #error_page  404              /404.html;

        # redirect server error pages to the static page /50x.html
        #
        error_page   500 502 503 504  /50x.html;
        location = /50x.html {
            root   html;
        }
    }
}


================================================
FILE: pingora-core/tests/nginx_proxy.conf
================================================

#user  nobody;
worker_processes 1;

error_log  /dev/stdout;
#error_log  logs/error.log  notice;
#error_log  logs/error.log  info;

#pid        logs/nginx.pid;
master_process off;
daemon off;

events {
    worker_connections  4096;
}


http {
    #include       mime.types;
    #default_type  application/octet-stream;

    #log_format  main  '$remote_addr - $remote_user [$time_local] "$request" '
    #                  '$status $body_bytes_sent "$http_referer" '
    #                  '"$http_user_agent" "$http_x_forwarded_for"';

    # access_log  logs/access.log  main;
    access_log  off;

    sendfile        on;
    #tcp_nopush     on;

    keepalive_timeout  30;
    keepalive_requests 99999;

    upstream plaintext {
        server 127.0.0.1:8000;
        keepalive 128;
        keepalive_requests 99999;
    }

    upstream ssl {
        server 127.0.0.1:8443;
        keepalive 128;
        keepalive_requests 99999;
    }

    #gzip  on;

    server {
        listen       8001;
        listen       [::]:8001;
        server_name  localproxy;

        location / {
            keepalive_timeout 30;
            proxy_pass http://plaintext;
            proxy_http_version 1.1;
            proxy_set_header Connection "Keep-Alive";
        }

    }

    server {
        listen       8002 ssl;
        listen       [::]:8002 ssl;
        server_name  localproxy_https;

        ssl_certificate keys/server.crt;
        ssl_certificate_key keys/key.pem;
        ssl_protocols TLSv1.2;
        ssl_ciphers TLS-AES-128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256;

        location / {
            keepalive_timeout 30;
            proxy_pass https://ssl;
            proxy_http_version 1.1;
            proxy_ssl_session_reuse off;
            proxy_ssl_verify on;
            proxy_ssl_server_name on;
            proxy_ssl_name "openrusty.org";
            proxy_ssl_trusted_certificate keys/server.crt;
            proxy_set_header Connection "Keep-Alive";
        }

    }
}


================================================
FILE: pingora-core/tests/pingora_conf.yaml
================================================
---
version: 1
client_bind_to_ipv4:
    - 127.0.0.2
ca_file: tests/keys/server.crt

================================================
FILE: pingora-core/tests/server_phase_fastshutdown.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// NOTE: This test sends a shutdown signal to itself,
// so it needs to be in an isolated test to prevent concurrency.

use pingora_core::server::{ExecutionPhase, RunArgs, Server};

// Ensure that execution phases are reported correctly.
#[test]
fn test_server_execution_phase_monitor_fast_shutdown() {
    let mut server = Server::new(None).unwrap();

    let mut phase = server.watch_execution_phase();

    let join = std::thread::spawn(move || {
        server.bootstrap();
        server.run(RunArgs::default());
    });

    assert!(matches!(
        phase.blocking_recv().unwrap(),
        ExecutionPhase::Bootstrap
    ));
    assert!(matches!(
        phase.blocking_recv().unwrap(),
        ExecutionPhase::BootstrapComplete,
    ));
    assert!(matches!(
        phase.blocking_recv().unwrap(),
        ExecutionPhase::Running,
    ));

    // Need to wait for startup, otherwise the signal handler is not
    // installed yet.
    //
    // TODO: signal handlers are installed after Running phase
    // message is sent, sleep for now to avoid test flake
    std::thread::sleep(std::time::Duration::from_millis(500));

    unsafe {
        libc::raise(libc::SIGINT);
    }

    assert!(matches!(
        phase.blocking_recv().unwrap(),
        ExecutionPhase::ShutdownStarted,
    ));

    assert!(matches!(
        phase.blocking_recv().unwrap(),
        ExecutionPhase::ShutdownRuntimes,
    ));

    join.join().unwrap();

    assert!(matches!(
        phase.blocking_recv().unwrap(),
        ExecutionPhase::Terminated,
    ));
}


================================================
FILE: pingora-core/tests/server_phase_gracefulshutdown.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// NOTE: This test sends a shutdown signal to itself,
// so it needs to be in an isolated test to prevent concurrency.

use pingora_core::server::{configuration::ServerConf, ExecutionPhase, RunArgs, Server};

// Ensure that execution phases are reported correctly.
#[test]
fn test_server_execution_phase_monitor_graceful_shutdown() {
    let conf = ServerConf {
        // Use small timeouts to speed up the test.
        grace_period_seconds: Some(1),
        graceful_shutdown_timeout_seconds: Some(1),
        ..Default::default()
    };
    let mut server = Server::new_with_opt_and_conf(None, conf);

    let mut phase = server.watch_execution_phase();

    let join = std::thread::spawn(move || {
        server.bootstrap();
        server.run(RunArgs::default());
    });

    assert!(matches!(
        phase.blocking_recv().unwrap(),
        ExecutionPhase::Bootstrap
    ));
    assert!(matches!(
        phase.blocking_recv().unwrap(),
        ExecutionPhase::BootstrapComplete,
    ));
    assert!(matches!(
        phase.blocking_recv().unwrap(),
        ExecutionPhase::Running,
    ));

    // Need to wait for startup, otherwise the signal handler is not
    // installed yet.
    //
    // TODO: signal handlers are installed after Running phase
    // message is sent, sleep for now to avoid test flake
    std::thread::sleep(std::time::Duration::from_millis(500));

    unsafe {
        libc::raise(libc::SIGTERM);
    }

    assert!(matches!(
        phase.blocking_recv().unwrap(),
        ExecutionPhase::GracefulTerminate,
    ));

    assert!(matches!(
        phase.blocking_recv().unwrap(),
        ExecutionPhase::ShutdownStarted,
    ));

    assert!(matches!(
        phase.blocking_recv().unwrap(),
        ExecutionPhase::ShutdownGracePeriod,
    ));

    assert!(matches!(
        dbg!(phase.blocking_recv().unwrap()),
        ExecutionPhase::ShutdownRuntimes,
    ));

    join.join().unwrap();

    assert!(matches!(
        phase.blocking_recv().unwrap(),
        ExecutionPhase::Terminated,
    ));
}


================================================
FILE: pingora-core/tests/test_basic.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

mod utils;

#[cfg(all(unix, feature = "any_tls"))]
use hyperlocal::{UnixClientExt, Uri};

#[tokio::test]
async fn test_http() {
    utils::init();
    let res = reqwest::get("http://127.0.0.1:6145").await.unwrap();
    assert_eq!(res.status(), reqwest::StatusCode::OK);
}

#[cfg(feature = "any_tls")]
#[tokio::test]
async fn test_https_http2() {
    utils::init();

    let client = reqwest::Client::builder()
        .danger_accept_invalid_certs(true)
        .build()
        .unwrap();

    let res = client.get("https://127.0.0.1:6146").send().await.unwrap();
    assert_eq!(res.status(), reqwest::StatusCode::OK);
    assert_eq!(res.version(), reqwest::Version::HTTP_2);

    let client = reqwest::Client::builder()
        .danger_accept_invalid_certs(true)
        .http1_only()
        .build()
        .unwrap();

    let res = client.get("https://127.0.0.1:6146").send().await.unwrap();
    assert_eq!(res.status(), reqwest::StatusCode::OK);
    assert_eq!(res.version(), reqwest::Version::HTTP_11);
}

#[cfg(unix)]
#[cfg(feature = "any_tls")]
#[tokio::test]
async fn test_uds() {
    utils::init();
    let url = Uri::new("/tmp/echo.sock", "/").into();
    let client = hyper::Client::unix();

    let res = client.get(url).await.unwrap();
    assert_eq!(res.status(), reqwest::StatusCode::OK);
}


================================================
FILE: pingora-core/tests/utils/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use once_cell::sync::Lazy;
use std::{thread, time};

use clap::Parser;
use pingora_core::listeners::Listeners;
use pingora_core::server::configuration::Opt;
use pingora_core::server::Server;
use pingora_core::services::listening::Service;

use async_trait::async_trait;
use bytes::Bytes;
use http::{Response, StatusCode};
use pingora_timeout::timeout;
use std::time::Duration;

use pingora_core::apps::http_app::ServeHttp;
use pingora_core::protocols::http::ServerSession;

#[derive(Clone)]
pub struct EchoApp;

#[async_trait]
impl ServeHttp for EchoApp {
    async fn response(&self, http_stream: &mut ServerSession) -> Response<Vec<u8>> {
        // read timeout of 2s
        let read_timeout = 2000;
        let body = match timeout(
            Duration::from_millis(read_timeout),
            http_stream.read_request_body(),
        )
        .await
        {
            Ok(res) => match res.unwrap() {
                Some(bytes) => bytes,
                None => Bytes::from("no body!"),
            },
            Err(_) => {
                panic!("Timed out after {:?}ms", read_timeout);
            }
        };

        Response::builder()
            .status(StatusCode::OK)
            .header(http::header::CONTENT_TYPE, "text/html")
            .header(http::header::CONTENT_LENGTH, body.len())
            .body(body.to_vec())
            .unwrap()
    }
}

pub struct MyServer {
    // Maybe useful in the future
    #[allow(dead_code)]
    pub handle: thread::JoinHandle<()>,
}

fn entry_point(opt: Option<Opt>) {
    env_logger::init();

    let cert_path = format!("{}/tests/keys/server.crt", env!("CARGO_MANIFEST_DIR"));
    let key_path = format!("{}/tests/keys/key.pem", env!("CARGO_MANIFEST_DIR"));

    let mut my_server = Server::new(opt).unwrap();
    my_server.bootstrap();

    let mut listeners = Listeners::tcp("0.0.0.0:6145");
    #[cfg(unix)]
    listeners.add_uds("/tmp/echo.sock", None);

    let mut tls_settings =
        pingora_core::listeners::tls::TlsSettings::intermediate(&cert_path, &key_path).unwrap();
    tls_settings.enable_h2();
    listeners.add_tls_with_settings("0.0.0.0:6146", None, tls_settings);

    let echo_service_http =
        Service::with_listeners("Echo Service HTTP".to_string(), listeners, EchoApp);

    my_server.add_service(echo_service_http);
    my_server.run_forever();
}

impl MyServer {
    pub fn start() -> Self {
        let opts: Vec<String> = vec![
            "pingora".into(),
            "-c".into(),
            "tests/pingora_conf.yaml".into(),
        ];
        let server_handle = thread::spawn(|| {
            entry_point(Some(Opt::parse_from(opts)));
        });
        // wait until the server is up
        thread::sleep(time::Duration::from_secs(2));
        MyServer {
            handle: server_handle,
        }
    }
}

pub static TEST_SERVER: Lazy<MyServer> = Lazy::new(MyServer::start);

pub fn init() {
    let _ = *TEST_SERVER;
}


================================================
FILE: pingora-error/Cargo.toml
================================================
[package]
name = "pingora-error"
version = "0.8.0"
authors = ["Yuchen Wu <yuchen@cloudflare.com>"]
license = "Apache-2.0"
edition = "2021"
repository = "https://github.com/cloudflare/pingora"
categories = ["rust-patterns"]
keywords = ["error", "error-handling", "pingora"]
description = """
Error types and error handling APIs for Pingora.
"""

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "pingora_error"
path = "src/lib.rs"


================================================
FILE: pingora-error/LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: pingora-error/src/immut_str.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::fmt;

/// A data struct that holds either immutable string or reference to static str.
/// Compared to String or `Box<str>`, it avoids memory allocation on static str.
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum ImmutStr {
    Static(&'static str),
    Owned(Box<str>),
}

impl ImmutStr {
    #[inline]
    pub fn as_str(&self) -> &str {
        match self {
            ImmutStr::Static(s) => s,
            ImmutStr::Owned(s) => s.as_ref(),
        }
    }

    pub fn is_owned(&self) -> bool {
        match self {
            ImmutStr::Static(_) => false,
            ImmutStr::Owned(_) => true,
        }
    }
}

impl fmt::Display for ImmutStr {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "{}", self.as_str())
    }
}

impl From<&'static str> for ImmutStr {
    fn from(s: &'static str) -> Self {
        ImmutStr::Static(s)
    }
}

impl From<String> for ImmutStr {
    fn from(s: String) -> Self {
        ImmutStr::Owned(s.into_boxed_str())
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_static_vs_owned() {
        let s: ImmutStr = "test".into();
        assert!(!s.is_owned());
        let s: ImmutStr = "test".to_string().into();
        assert!(s.is_owned());
    }
}


================================================
FILE: pingora-error/src/lib.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#![warn(clippy::all)]
//! The library to provide the struct to represent errors in pingora.

pub use std::error::Error as ErrorTrait;
use std::fmt;
use std::fmt::Debug;
use std::result::Result as StdResult;

mod immut_str;
pub use immut_str::ImmutStr;

/// The boxed [Error], the desired way to pass [Error]
pub type BError = Box<Error>;
/// Syntax sugar for `std::Result<T, BError>`
pub type Result<T, E = BError> = StdResult<T, E>;

/// The struct that represents an error
#[derive(Debug)]
pub struct Error {
    /// the type of error
    pub etype: ErrorType,
    /// the source of error: from upstream, downstream or internal
    pub esource: ErrorSource,
    /// if the error is retry-able
    pub retry: RetryType,
    /// chain to the cause of this error
    pub cause: Option<Box<dyn ErrorTrait + Send + Sync>>,
    /// an arbitrary string that explains the context when the error happens
    pub context: Option<ImmutStr>,
}

/// The source of the error
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum ErrorSource {
    /// The error is caused by the remote server
    Upstream,
    /// The error is caused by the remote client
    Downstream,
    /// The error is caused by the internal logic
    Internal,
    /// Error source unknown or to be set
    Unset,
}

/// Whether the request can be retried after encountering this error
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum RetryType {
    Decided(bool),
    ReusedOnly, // only retry when the error is from a reused connection
}

impl RetryType {
    pub fn decide_reuse(&mut self, reused: bool) {
        if matches!(self, RetryType::ReusedOnly) {
            *self = RetryType::Decided(reused);
        }
    }

    pub fn retry(&self) -> bool {
        match self {
            RetryType::Decided(b) => *b,
            RetryType::ReusedOnly => {
                panic!("Retry is not decided")
            }
        }
    }
}

impl From<bool> for RetryType {
    fn from(b: bool) -> Self {
        RetryType::Decided(b)
    }
}

impl ErrorSource {
    /// for displaying the error source
    pub fn as_str(&self) -> &'static str {
        match self {
            Self::Upstream => "Upstream",
            Self::Downstream => "Downstream",
            Self::Internal => "Internal",
            Self::Unset => "",
        }
    }
}

/// Predefined type of errors
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum ErrorType {
    // connect errors
    ConnectTimedout,
    ConnectRefused,
    ConnectNoRoute,
    TLSWantX509Lookup,
    TLSHandshakeFailure,
    TLSHandshakeTimedout,
    InvalidCert,
    HandshakeError, // other handshake
    ConnectError,   // catch all
    BindError,
    AcceptError,
    SocketError,
    ConnectProxyFailure,
    // protocol errors
    InvalidHTTPHeader,
    H1Error,     // catch all
    H2Error,     // catch all
    H2Downgrade, // Peer over h2 requests to downgrade to h1
    InvalidH2,   // Peer sends invalid h2 frames to us
    // IO error on established connections
    ReadError,
    WriteError,
    ReadTimedout,
    WriteTimedout,
    ConnectionClosed,
    // application error, will return HTTP status code
    HTTPStatus(u16),
    // file related
    FileOpenError,
    FileCreateError,
    FileReadError,
    FileWriteError,
    // other errors
    InternalError,
    // catch all
    UnknownError,
    /// Custom error with static string.
    /// this field is to allow users to extend the types of errors. If runtime generated string
    /// is needed, it is more likely to be treated as "context" rather than "type".
    Custom(&'static str),
    /// Custom error with static string and code.
    /// this field allows users to extend error further with error codes.
    CustomCode(&'static str, u16),
}

impl ErrorType {
    /// create a new type of error. Users should try to make `name` unique.
    pub const fn new(name: &'static str) -> Self {
        ErrorType::Custom(name)
    }

    /// create a new type of error. Users should try to make `name` unique.
    pub const fn new_code(name: &'static str, code: u16) -> Self {
        ErrorType::CustomCode(name, code)
    }

    /// for displaying the error type
    pub fn as_str(&self) -> &'static str {
        match self {
            ErrorType::ConnectTimedout => "ConnectTimedout",
            ErrorType::ConnectRefused => "ConnectRefused",
            ErrorType::ConnectNoRoute => "ConnectNoRoute",
            ErrorType::ConnectProxyFailure => "ConnectProxyFailure",
            ErrorType::TLSWantX509Lookup => "TLSWantX509Lookup",
            ErrorType::TLSHandshakeFailure => "TLSHandshakeFailure",
            ErrorType::TLSHandshakeTimedout => "TLSHandshakeTimedout",
            ErrorType::InvalidCert => "InvalidCert",
            ErrorType::HandshakeError => "HandshakeError",
            ErrorType::ConnectError => "ConnectError",
            ErrorType::BindError => "BindError",
            ErrorType::AcceptError => "AcceptError",
            ErrorType::SocketError => "SocketError",
            ErrorType::InvalidHTTPHeader => "InvalidHTTPHeader",
            ErrorType::H1Error => "H1Error",
            ErrorType::H2Error => "H2Error",
            ErrorType::InvalidH2 => "InvalidH2",
            ErrorType::H2Downgrade => "H2Downgrade",
            ErrorType::ReadError => "ReadError",
            ErrorType::WriteError => "WriteError",
            ErrorType::ReadTimedout => "ReadTimedout",
            ErrorType::WriteTimedout => "WriteTimedout",
            ErrorType::ConnectionClosed => "ConnectionClosed",
            ErrorType::FileOpenError => "FileOpenError",
            ErrorType::FileCreateError => "FileCreateError",
            ErrorType::FileReadError => "FileReadError",
            ErrorType::FileWriteError => "FileWriteError",
            ErrorType::HTTPStatus(_) => "HTTPStatus",
            ErrorType::InternalError => "InternalError",
            ErrorType::UnknownError => "UnknownError",
            ErrorType::Custom(s) => s,
            ErrorType::CustomCode(s, _) => s,
        }
    }
}

impl Error {
    /// Simply create the error. See other functions that provide less verbose interfaces.
    #[inline]
    pub fn create(
        etype: ErrorType,
        esource: ErrorSource,
        context: Option<ImmutStr>,
        cause: Option<Box<dyn ErrorTrait + Send + Sync>>,
    ) -> BError {
        let retry = if let Some(c) = cause.as_ref() {
            if let Some(e) = c.downcast_ref::<BError>() {
                e.retry
            } else {
                false.into()
            }
        } else {
            false.into()
        };
        Box::new(Error {
            etype,
            esource,
            retry,
            cause,
            context,
        })
    }

    #[inline]
    fn do_new(e: ErrorType, s: ErrorSource) -> BError {
        Self::create(e, s, None, None)
    }

    /// Create an error with the given type
    #[inline]
    pub fn new(e: ErrorType) -> BError {
        Self::do_new(e, ErrorSource::Unset)
    }

    /// Create an error with the given type, a context string and the causing error.
    /// This method is usually used when there the error is caused by another error.
    /// ```
    /// use pingora_error::{Error, ErrorType, Result};
    ///
    /// fn b() -> Result<()> {
    ///     // ...
    ///     Ok(())
    /// }
    /// fn do_something() -> Result<()> {
    ///     // a()?;
    ///     b().map_err(|e| Error::because(ErrorType::InternalError, "b failed after a", e))
    /// }
    /// ```
    /// Choose carefully between simply surfacing the causing error versus Because() here.
    /// Only use Because() when there is extra context that is not capture by
    /// the causing error itself.
    #[inline]
    pub fn because<S: Into<ImmutStr>, E: Into<Box<dyn ErrorTrait + Send + Sync>>>(
        e: ErrorType,
        context: S,
        cause: E,
    ) -> BError {
        Self::create(
            e,
            ErrorSource::Unset,
            Some(context.into()),
            Some(cause.into()),
        )
    }

    /// Short for Err(Self::because)
    #[inline]
    pub fn e_because<T, S: Into<ImmutStr>, E: Into<Box<dyn ErrorTrait + Send + Sync>>>(
        e: ErrorType,
        context: S,
        cause: E,
    ) -> Result<T> {
        Err(Self::because(e, context, cause))
    }

    /// Create an error with context but no direct causing error
    #[inline]
    pub fn explain<S: Into<ImmutStr>>(e: ErrorType, context: S) -> BError {
        Self::create(e, ErrorSource::Unset, Some(context.into()), None)
    }

    /// Short for Err(Self::explain)
    #[inline]
    pub fn e_explain<T, S: Into<ImmutStr>>(e: ErrorType, context: S) -> Result<T> {
        Err(Self::explain(e, context))
    }

    /// The new_{up, down, in} functions are to create new errors with source
    /// {upstream, downstream, internal}
    #[inline]
    pub fn new_up(e: ErrorType) -> BError {
        Self::do_new(e, ErrorSource::Upstream)
    }

    #[inline]
    pub fn new_down(e: ErrorType) -> BError {
        Self::do_new(e, ErrorSource::Downstream)
    }

    #[inline]
    pub fn new_in(e: ErrorType) -> BError {
        Self::do_new(e, ErrorSource::Internal)
    }

    /// Create a new custom error with the static string
    #[inline]
    pub fn new_str(s: &'static str) -> BError {
        Self::do_new(ErrorType::Custom(s), ErrorSource::Unset)
    }

    // the err_* functions are the same as new_* but return a Result<T>
    #[inline]
    pub fn err<T>(e: ErrorType) -> Result<T> {
        Err(Self::new(e))
    }

    #[inline]
    pub fn err_up<T>(e: ErrorType) -> Result<T> {
        Err(Self::new_up(e))
    }

    #[inline]
    pub fn err_down<T>(e: ErrorType) -> Result<T> {
        Err(Self::new_down(e))
    }

    #[inline]
    pub fn err_in<T>(e: ErrorType) -> Result<T> {
        Err(Self::new_in(e))
    }

    pub fn etype(&self) -> &ErrorType {
        &self.etype
    }

    pub fn esource(&self) -> &ErrorSource {
        &self.esource
    }

    pub fn retry(&self) -> bool {
        self.retry.retry()
    }

    pub fn set_retry(&mut self, retry: bool) {
        self.retry = retry.into();
    }

    pub fn reason_str(&self) -> &str {
        self.etype.as_str()
    }

    pub fn source_str(&self) -> &str {
        self.esource.as_str()
    }

    /// The as_{up, down, in} functions are to change the current errors with source
    /// {upstream, downstream, internal}
    pub fn as_up(&mut self) {
        self.esource = ErrorSource::Upstream;
    }

    pub fn as_down(&mut self) {
        self.esource = ErrorSource::Downstream;
    }

    pub fn as_in(&mut self) {
        self.esource = ErrorSource::Internal;
    }

    /// The into_{up, down, in} are the same as as_* but takes `self` and also return `self`
    pub fn into_up(mut self: BError) -> BError {
        self.as_up();
        self
    }

    pub fn into_down(mut self: BError) -> BError {
        self.as_down();
        self
    }

    pub fn into_in(mut self: BError) -> BError {
        self.as_in();
        self
    }

    pub fn into_err<T>(self: BError) -> Result<T> {
        Err(self)
    }

    pub fn set_cause<C: Into<Box<dyn ErrorTrait + Send + Sync>>>(&mut self, cause: C) {
        self.cause = Some(cause.into());
    }

    pub fn set_context<T: Into<ImmutStr>>(&mut self, context: T) {
        self.context = Some(context.into());
    }

    /// Create a new error from self, with the same type and source and put self as the cause
    /// ```
    /// use pingora_error::Result;
    ///
    ///  fn b() -> Result<()> {
    ///     // ...
    ///     Ok(())
    /// }
    ///
    /// fn do_something() -> Result<()> {
    ///     // a()?;
    ///     b().map_err(|e| e.more_context("b failed after a"))
    /// }
    /// ```
    /// This function is less verbose than `Because`. But it only work for [Error] while
    /// `Because` works for all types of errors who implement [std::error::Error] trait.
    pub fn more_context<T: Into<ImmutStr>>(self: BError, context: T) -> BError {
        let esource = self.esource.clone();
        let retry = self.retry;
        let mut e = Self::because(self.etype.clone(), context, self);
        e.esource = esource;
        e.retry = retry;
        e
    }

    // Display error but skip the duplicate elements from the error in previous hop
    fn chain_display(&self, previous: Option<&Error>, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        if previous.map(|p| p.esource != self.esource).unwrap_or(true) {
            write!(f, "{}", self.esource.as_str())?
        }
        if previous.map(|p| p.etype != self.etype).unwrap_or(true) {
            write!(f, " {}", self.etype.as_str())?
        }

        if let Some(c) = self.context.as_ref() {
            write!(f, " context: {}", c)?;
        }
        if let Some(c) = self.cause.as_ref() {
            if let Some(e) = c.downcast_ref::<BError>() {
                write!(f, " cause: ")?;
                e.chain_display(Some(self), f)
            } else {
                write!(f, " cause: {}", c)
            }
        } else {
            Ok(())
        }
    }

    /// Return the ErrorType of the root Error
    pub fn root_etype(&self) -> &ErrorType {
        self.cause.as_ref().map_or(&self.etype, |c| {
            // Stop the recursion if the cause is not Error
            c.downcast_ref::<BError>()
                .map_or(&self.etype, |e| e.root_etype())
        })
    }

    pub fn root_cause(&self) -> &(dyn ErrorTrait + Send + Sync + 'static) {
        self.cause.as_deref().map_or(self, |c| {
            c.downcast_ref::<BError>().map_or(c, |e| e.root_cause())
        })
    }
}

impl fmt::Display for Error {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        self.chain_display(None, f)
    }
}

impl ErrorTrait for Error {}

/// Helper trait to add more context to a given error
pub trait Context<T> {
    /// Wrap the `Err(E)` in [Result] with more context, the existing E will be the cause.
    ///
    /// This is a shortcut for map_err() + more_context()
    fn err_context<C: Into<ImmutStr>, F: FnOnce() -> C>(self, context: F) -> Result<T, BError>;
}

impl<T> Context<T> for Result<T, BError> {
    fn err_context<C: Into<ImmutStr>, F: FnOnce() -> C>(self, context: F) -> Result<T, BError> {
        self.map_err(|e| e.more_context(context()))
    }
}

/// Helper trait to chain errors with context
pub trait OrErr<T, E> {
    /// Wrap the E in [Result] with new [ErrorType] and context, the existing E will be the cause.
    ///
    /// This is a shortcut for map_err() + because()
    fn or_err(self, et: ErrorType, context: &'static str) -> Result<T, BError>
    where
        E: Into<Box<dyn ErrorTrait + Send + Sync>>;

    /// Similar to or_err(), but takes a closure, which is useful for constructing String.
    fn or_err_with<C: Into<ImmutStr>, F: FnOnce() -> C>(
        self,
        et: ErrorType,
        context: F,
    ) -> Result<T, BError>
    where
        E: Into<Box<dyn ErrorTrait + Send + Sync>>;

    /// Replace the E in [Result] with a new [Error] generated from the current error
    ///
    /// This is useful when the current error cannot move out of scope. This is a shortcut for map_err() + explain().
    fn explain_err<C: Into<ImmutStr>, F: FnOnce(E) -> C>(
        self,
        et: ErrorType,
        context: F,
    ) -> Result<T, BError>;

    /// Similar to or_err() but just to surface errors that are not [Error] (where `?` cannot be used directly).
    ///
    /// or_err()/or_err_with() are still preferred because they make the error more readable and traceable.
    fn or_fail(self) -> Result<T>
    where
        E: Into<Box<dyn ErrorTrait + Send + Sync>>;
}

impl<T, E> OrErr<T, E> for Result<T, E> {
    fn or_err(self, et: ErrorType, context: &'static str) -> Result<T, BError>
    where
        E: Into<Box<dyn ErrorTrait + Send + Sync>>,
    {
        self.map_err(|e| Error::because(et, context, e))
    }

    fn or_err_with<C: Into<ImmutStr>, F: FnOnce() -> C>(
        self,
        et: ErrorType,
        context: F,
    ) -> Result<T, BError>
    where
        E: Into<Box<dyn ErrorTrait + Send + Sync>>,
    {
        self.map_err(|e| Error::because(et, context(), e))
    }

    fn explain_err<C: Into<ImmutStr>, F: FnOnce(E) -> C>(
        self,
        et: ErrorType,
        exp: F,
    ) -> Result<T, BError> {
        self.map_err(|e| Error::explain(et, exp(e)))
    }

    fn or_fail(self) -> Result<T, BError>
    where
        E: Into<Box<dyn ErrorTrait + Send + Sync>>,
    {
        self.map_err(|e| Error::because(ErrorType::InternalError, "", e))
    }
}

/// Helper trait to convert an [Option] to an [Error] with context.
pub trait OkOrErr<T> {
    fn or_err(self, et: ErrorType, context: &'static str) -> Result<T, BError>;

    fn or_err_with<C: Into<ImmutStr>, F: FnOnce() -> C>(
        self,
        et: ErrorType,
        context: F,
    ) -> Result<T, BError>;
}

impl<T> OkOrErr<T> for Option<T> {
    /// Convert the [Option] to a new [Error] with [ErrorType] and context if None, Ok otherwise.
    ///
    /// This is a shortcut for .ok_or(Error::explain())
    fn or_err(self, et: ErrorType, context: &'static str) -> Result<T, BError> {
        self.ok_or(Error::explain(et, context))
    }

    /// Similar to to_err(), but takes a closure, which is useful for constructing String.
    fn or_err_with<C: Into<ImmutStr>, F: FnOnce() -> C>(
        self,
        et: ErrorType,
        context: F,
    ) -> Result<T, BError> {
        self.ok_or_else(|| Error::explain(et, context()))
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_chain_of_error() {
        let e1 = Error::new(ErrorType::InternalError);
        let mut e2 = Error::new(ErrorType::HTTPStatus(400));
        e2.set_cause(e1);
        assert_eq!(format!("{}", e2), " HTTPStatus cause:  InternalError");
        assert_eq!(e2.root_etype().as_str(), "InternalError");

        let e3 = Error::new(ErrorType::InternalError);
        let e4 = Error::because(ErrorType::HTTPStatus(400), "test", e3);
        assert_eq!(
            format!("{}", e4),
            " HTTPStatus context: test cause:  InternalError"
        );
        assert_eq!(e4.root_etype().as_str(), "InternalError");
    }

    #[test]
    fn test_error_context() {
        let mut e1 = Error::new(ErrorType::InternalError);
        e1.set_context(format!("{} {}", "my", "context"));
        assert_eq!(format!("{}", e1), " InternalError context: my context");
    }

    #[test]
    fn test_context_trait() {
        let e1: Result<(), BError> = Err(Error::new(ErrorType::InternalError));
        let e2 = e1.err_context(|| "another");
        assert_eq!(
            format!("{}", e2.unwrap_err()),
            " InternalError context: another cause: "
        );
    }

    #[test]
    fn test_cause_trait() {
        let e1: Result<(), BError> = Err(Error::new(ErrorType::InternalError));
        let e2 = e1.or_err(ErrorType::HTTPStatus(400), "another");
        assert_eq!(
            format!("{}", e2.unwrap_err()),
            " HTTPStatus context: another cause:  InternalError"
        );
    }

    #[test]
    fn test_option_some_ok() {
        let m = Some(2);
        let o = m.or_err(ErrorType::InternalError, "some is not an error!");
        assert_eq!(2, o.unwrap());

        let o = m.or_err_with(ErrorType::InternalError, || "some is not an error!");
        assert_eq!(2, o.unwrap());
    }

    #[test]
    fn test_option_none_err() {
        let m: Option<i32> = None;
        let e1 = m.or_err(ErrorType::InternalError, "none is an error!");
        assert_eq!(
            format!("{}", e1.unwrap_err()),
            " InternalError context: none is an error!"
        );

        let e1 = m.or_err_with(ErrorType::InternalError, || "none is an error!");
        assert_eq!(
            format!("{}", e1.unwrap_err()),
            " InternalError context: none is an error!"
        );
    }

    #[test]
    fn test_into() {
        fn other_error() -> Result<(), &'static str> {
            Err("oops")
        }

        fn surface_err() -> Result<()> {
            other_error().or_fail()?; // can return directly but want to showcase ?
            Ok(())
        }

        let e = surface_err().unwrap_err();
        assert_eq!(format!("{}", e), " InternalError context:  cause: oops");
    }
}


================================================
FILE: pingora-header-serde/Cargo.toml
================================================
[package]
name = "pingora-header-serde"
version = "0.8.0"
authors = ["Yuchen Wu <yuchen@cloudflare.com>"]
license = "Apache-2.0"
edition = "2021"
repository = "https://github.com/cloudflare/pingora"
categories = ["compression"]
keywords = ["http", "compression", "pingora"]
exclude = ["samples/*"]
description = """
HTTP header (de)serialization and compression for Pingora.
"""

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "pingora_header_serde"
path = "src/lib.rs"

[[bin]]
name = "trainer"
path = "src/trainer.rs"

[dependencies]
zstd = "0.13.1"
zstd-safe = { version = "7.1.0", features = ["std"] }
http = { workspace = true }
bytes = { workspace = true }
httparse = { workspace = true }
pingora-error = { version = "0.8.0", path = "../pingora-error" }
pingora-http = { version = "0.8.0", path = "../pingora-http" }
thread_local = "1.0"


================================================
FILE: pingora-header-serde/LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: pingora-header-serde/samples/test/1
================================================
HTTP/1.1 200 OK
Server: nginx
Date: Wed, 22 Dec 2021 06:30:29 GMT
Content-Type: application/javascript
Last-Modified: Mon, 29 Nov 2021 10:13:32 GMT
Transfer-Encoding: chunked
Connection: keep-alive
Vary: Accept-Encoding
ETag: W/"61a4a7cc-21df8"
Access-Control-Allow-Origin: *
Access-Control-Allow-Credentials: true
Access-Control-Expose-Headers: Content-Length,Content-Range
Access-Control-Allow-Headers: Range
Content-Encoding: gzip


================================================
FILE: pingora-header-serde/samples/test/2
================================================
HTTP/1.1 200 OK
Server: nginx
Date: Thu, 23 Dec 2021 15:12:32 GMT
Content-Type: application/javascript
Last-Modified: Mon, 09 Sep 2019 12:47:14 GMT
Transfer-Encoding: chunked
Connection: keep-alive
Vary: Accept-Encoding
ETag: W/"5d7649d2-16ec64"
Access-Control-Allow-Origin: *
Access-Control-Allow-Credentials: true
Access-Control-Expose-Headers: Content-Length,Content-Range
Access-Control-Allow-Headers: Range
Content-Encoding: gzip


================================================
FILE: pingora-header-serde/samples/test/3
================================================
HTTP/1.1 200 OK
Server: nginx
Date: Wed, 22 Dec 2021 12:29:00 GMT
Content-Type: application/javascript
Last-Modified: Mon, 09 Sep 2019 07:47:37 GMT
Transfer-Encoding: chunked
Connection: keep-alive
Vary: Accept-Encoding
ETag: W/"5d760399-52868"
Access-Control-Allow-Origin: *
Access-Control-Allow-Credentials: true
Access-Control-Expose-Headers: Content-Length,Content-Range
Access-Control-Allow-Headers: Range
Content-Encoding: gzip


================================================
FILE: pingora-header-serde/samples/test/4
================================================
HTTP/1.1 200 OK
Server: nginx
Date: Wed, 22 Dec 2021 06:11:09 GMT
Content-Type: application/javascript
Last-Modified: Mon, 20 Dec 2021 01:23:10 GMT
Transfer-Encoding: chunked
Connection: keep-alive
Vary: Accept-Encoding
ETag: W/"61bfdafe-21bc4"
Access-Control-Allow-Origin: *
Access-Control-Allow-Credentials: true
Access-Control-Expose-Headers: Content-Length,Content-Range
Access-Control-Allow-Headers: Range
Content-Encoding: gzip


================================================
FILE: pingora-header-serde/samples/test/5
================================================
HTTP/1.1 200 OK
Server: nginx
Date: Thu, 23 Dec 2021 15:23:29 GMT
Content-Type: application/javascript
Last-Modified: Sat, 09 Oct 2021 23:41:34 GMT
Transfer-Encoding: chunked
Connection: keep-alive
Vary: Accept-Encoding
ETag: W/"616228ae-52054"
Access-Control-Allow-Origin: *
Access-Control-Allow-Credentials: true
Access-Control-Expose-Headers: Content-Length,Content-Range
Access-Control-Allow-Headers: Range
Content-Encoding: gzip


================================================
FILE: pingora-header-serde/samples/test/6
================================================
HTTP/1.1 200 OK
Server: nginx
Date: Wed, 22 Dec 2021 06:30:29 GMT
Content-Type: application/javascript
Last-Modified: Mon, 29 Nov 2021 10:13:32 GMT
Transfer-Encoding: chunked
Connection: keep-alive
Vary: Accept-Encoding
ETag: W/"61a4a7cc-21df8"
Access-Control-Allow-Origin: *
Access-Control-Allow-Credentials: true
Access-Control-Expose-Headers: Content-Length,Content-Range
Access-Control-Allow-Headers: Range
Content-Encoding: gzip


================================================
FILE: pingora-header-serde/samples/test/7
================================================
HTTP/1.1 200 OK
server: nginx
date: Sat, 25 Dec 2021 03:05:35 GMT
content-type: application/javascript
last-modified: Fri, 24 Dec 2021 04:20:01 GMT
transfer-encoding: chunked
connection: keep-alive
vary: Accept-Encoding
etag: W/"61c54a71-2d590"
access-control-allow-origin: *
access-control-allow-credentials: true
access-control-expose-headers: Content-Length,Content-Range
access-control-allow-headers: Range
content-encoding: gzip


================================================
FILE: pingora-header-serde/src/dict.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Training to generate the zstd dictionary.

use std::fs;
use zstd::dict;

/// Train the zstd dictionary from all the files under the given `dir_path`
///
/// The output will be the trained dictionary
pub fn train<P: AsRef<std::path::Path>>(dir_path: P) -> Vec<u8> {
    // TODO: check f is file, it can be dir
    let files = fs::read_dir(dir_path)
        .unwrap()
        .filter_map(|entry| entry.ok().map(|f| f.path()));
    dict::from_files(files, 64 * 1024 * 1024).unwrap()
}

#[cfg(test)]
mod test {
    use super::*;
    use crate::resp_header_to_buf;
    use pingora_http::ResponseHeader;

    fn gen_test_dict() -> Vec<u8> {
        let mut path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
        path.push("samples/test");
        train(path)
    }

    fn gen_test_header() -> ResponseHeader {
        let mut header = ResponseHeader::build(200, None).unwrap();
        header
            .append_header("Date", "Thu, 23 Dec 2021 11:23:29 GMT")
            .unwrap();
        header
            .append_header("Last-Modified", "Sat, 09 Oct 2021 22:41:34 GMT")
            .unwrap();
        header.append_header("Connection", "keep-alive").unwrap();
        header.append_header("Vary", "Accept-encoding").unwrap();
        header.append_header("Content-Encoding", "gzip").unwrap();
        header
            .append_header("Access-Control-Allow-Origin", "*")
            .unwrap();
        header
    }

    #[test]
    fn test_ser_with_dict() {
        let dict = gen_test_dict();
        let serde = crate::HeaderSerde::new(Some(dict));
        let serde_no_dict = crate::HeaderSerde::new(None);
        let header = gen_test_header();

        let compressed = serde.serialize(&header).unwrap();
        let compressed_no_dict = serde_no_dict.serialize(&header).unwrap();
        let mut buf = vec![];
        let uncompressed = resp_header_to_buf(&header, &mut buf);

        assert!(compressed.len() < uncompressed);
        assert!(compressed.len() < compressed_no_dict.len());
    }

    #[test]
    fn test_deserialize_with_dict() {
        let dict = gen_test_dict();
        let serde = crate::HeaderSerde::new(Some(dict));
        let serde_no_dict = crate::HeaderSerde::new(None);
        let header = gen_test_header();

        let compressed = serde.serialize(&header).unwrap();
        let compressed_no_dict = serde_no_dict.serialize(&header).unwrap();

        let from_dict_header = serde.deserialize(&compressed).unwrap();
        let from_no_dict_header = serde_no_dict.deserialize(&compressed_no_dict).unwrap();

        assert_eq!(from_dict_header.status, from_no_dict_header.status);
        assert_eq!(from_dict_header.headers, from_no_dict_header.headers);
    }

    #[test]
    fn test_ser_de_with_dict() {
        let dict = gen_test_dict();
        let serde = crate::HeaderSerde::new(Some(dict));
        let header = gen_test_header();

        let compressed = serde.serialize(&header).unwrap();
        let header2 = serde.deserialize(&compressed).unwrap();

        assert_eq!(header.status, header2.status);
        assert_eq!(header.headers, header2.headers);
    }
}


================================================
FILE: pingora-header-serde/src/lib.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! HTTP Response header serialization with compression
//!
//! This crate is able to serialize http response header to about 1/3 of its original size (HTTP/1.1 wire format)
//! with trained dictionary.

#![warn(clippy::all)]
#![allow(clippy::new_without_default)]
#![allow(clippy::type_complexity)]

pub mod dict;
mod thread_zstd;

use bytes::BufMut;
use http::Version;
use pingora_error::{Error, ErrorType, ImmutStr, Result};
use pingora_http::ResponseHeader;
use std::cell::RefCell;
use std::ops::DerefMut;
use thread_local::ThreadLocal;

/// HTTP Response header serialization
///
/// This struct provides the APIs to convert HTTP response header into compressed wired format for
/// storage.
pub struct HeaderSerde {
    compression: ZstdCompression,
    // internal buffer for uncompressed data to be compressed and vice versa
    buf: ThreadLocal<RefCell<Vec<u8>>>,
}

const MAX_HEADER_BUF_SIZE: usize = 128 * 1024; // 128KB

const COMPRESS_LEVEL: i32 = 3;

impl HeaderSerde {
    /// Create a new [HeaderSerde]
    ///
    /// An optional zstd compression dictionary can be provided to improve the compression ratio
    /// and speed. See [dict] for more details.
    pub fn new(dict: Option<Vec<u8>>) -> Self {
        if let Some(dict) = dict {
            HeaderSerde {
                compression: ZstdCompression::WithDict(thread_zstd::CompressionWithDict::new(
                    &dict,
                    COMPRESS_LEVEL,
                )),
                buf: ThreadLocal::new(),
            }
        } else {
            HeaderSerde {
                compression: ZstdCompression::Default(
                    thread_zstd::Compression::new(),
                    COMPRESS_LEVEL,
                ),
                buf: ThreadLocal::new(),
            }
        }
    }

    /// Serialize the given response header
    pub fn serialize(&self, header: &ResponseHeader) -> Result<Vec<u8>> {
        // for now we use HTTP 1.1 wire format for that
        // TODO: should convert to h1 if the incoming header is for h2
        let mut buf = self
            .buf
            .get_or(|| RefCell::new(Vec::with_capacity(MAX_HEADER_BUF_SIZE)))
            .borrow_mut();
        buf.clear(); // reset the buf
        resp_header_to_buf(header, &mut buf);
        self.compression.compress(&buf)
    }

    /// Deserialize the given response header
    pub fn deserialize(&self, data: &[u8]) -> Result<ResponseHeader> {
        let mut buf = self
            .buf
            .get_or(|| RefCell::new(Vec::with_capacity(MAX_HEADER_BUF_SIZE)))
            .borrow_mut();
        buf.clear(); // reset the buf
        self.compression
            .decompress_to_buffer(data, buf.deref_mut())?;
        buf_to_http_header(&buf)
    }
}

// Wrapper type to unify compressing with and withuot a dictionary,
// since the two structs have different inputs for their APIs.
enum ZstdCompression {
    Default(thread_zstd::Compression, i32),
    WithDict(thread_zstd::CompressionWithDict),
}

#[inline]
fn into_error<S: Into<ImmutStr>>(e: &'static str, context: S) -> Box<Error> {
    Error::because(ErrorType::InternalError, context, e)
}

impl ZstdCompression {
    fn compress(&self, data: &[u8]) -> Result<Vec<u8>> {
        match &self {
            ZstdCompression::Default(c, level) => c
                .compress(data, *level)
                .map_err(|e| into_error(e, "compress header")),
            ZstdCompression::WithDict(c) => c
                .compress(data)
                .map_err(|e| into_error(e, "compress header")),
        }
    }

    fn decompress_to_buffer(&self, source: &[u8], destination: &mut Vec<u8>) -> Result<usize> {
        match &self {
            ZstdCompression::Default(c, _) => {
                c.decompress_to_buffer(source, destination).map_err(|e| {
                    into_error(
                        e,
                        format!(
                            "decompress header, frame_content_size: {}",
                            get_frame_content_size(source)
                        ),
                    )
                })
            }
            ZstdCompression::WithDict(c) => {
                c.decompress_to_buffer(source, destination).map_err(|e| {
                    into_error(
                        e,
                        format!(
                            "decompress header, frame_content_size: {}",
                            get_frame_content_size(source)
                        ),
                    )
                })
            }
        }
    }
}

#[inline]
fn get_frame_content_size(source: &[u8]) -> ImmutStr {
    match zstd_safe::get_frame_content_size(source) {
        Ok(Some(size)) => match size {
            zstd_safe::CONTENTSIZE_ERROR => ImmutStr::from("invalid"),
            zstd_safe::CONTENTSIZE_UNKNOWN => ImmutStr::from("unknown"),
            _ => ImmutStr::from(size.to_string()),
        },
        Ok(None) => ImmutStr::from("none"),
        Err(_e) => ImmutStr::from("failed"),
    }
}

const CRLF: &[u8; 2] = b"\r\n";

// Borrowed from pingora http1
#[inline]
fn resp_header_to_buf(resp: &ResponseHeader, buf: &mut Vec<u8>) -> usize {
    // Status-Line
    let version = match resp.version {
        Version::HTTP_10 => "HTTP/1.0 ",
        Version::HTTP_11 => "HTTP/1.1 ",
        _ => "HTTP/1.1 ", // store everything else (including h2) in http 1.1 format
    };
    buf.put_slice(version.as_bytes());
    let status = resp.status;
    buf.put_slice(status.as_str().as_bytes());
    buf.put_u8(b' ');
    let reason = status.canonical_reason();
    if let Some(reason_buf) = reason {
        buf.put_slice(reason_buf.as_bytes());
    }
    buf.put_slice(CRLF);

    // headers
    resp.header_to_h1_wire(buf);

    buf.put_slice(CRLF);

    buf.len()
}

// Should match pingora http1 setting
const MAX_HEADERS: usize = 256;

#[inline]
fn buf_to_http_header(buf: &[u8]) -> Result<ResponseHeader> {
    let mut headers = vec![httparse::EMPTY_HEADER; MAX_HEADERS];
    let mut resp = httparse::Response::new(&mut headers);

    match resp.parse(buf) {
        Ok(s) => match s {
            httparse::Status::Complete(_size) => parsed_to_header(&resp),
            // we always feed the but that contains the entire header to parse
            _ => Error::e_explain(ErrorType::InternalError, "incomplete uncompressed header"),
        },
        Err(e) => Error::e_because(
            ErrorType::InternalError,
            format!(
                "parsing failed on uncompressed header, len={}, content={:?}",
                buf.len(),
                String::from_utf8_lossy(buf)
            ),
            e,
        ),
    }
}

#[inline]
fn parsed_to_header(parsed: &httparse::Response) -> Result<ResponseHeader> {
    // code should always be there
    // TODO: allow reading the parsed http version?
    let mut resp = ResponseHeader::build(parsed.code.unwrap(), Some(parsed.headers.len()))?;

    for header in parsed.headers.iter() {
        resp.append_header(header.name.to_string(), header.value)?;
    }

    Ok(resp)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_ser_wo_dict() {
        let serde = HeaderSerde::new(None);
        let mut header = ResponseHeader::build(200, None).unwrap();
        header.append_header("foo", "bar").unwrap();
        header.append_header("foo", "barbar").unwrap();
        header.append_header("foo", "barbarbar").unwrap();
        header.append_header("Server", "Pingora").unwrap();

        let compressed = serde.serialize(&header).unwrap();
        let mut buf = vec![];
        let uncompressed = resp_header_to_buf(&header, &mut buf);
        assert!(compressed.len() < uncompressed);
    }

    #[test]
    fn test_ser_de_no_dict() {
        let serde = HeaderSerde::new(None);
        let mut header = ResponseHeader::build(200, None).unwrap();
        header.append_header("foo1", "bar1").unwrap();
        header.append_header("foo2", "barbar2").unwrap();
        header.append_header("foo3", "barbarbar3").unwrap();
        header.append_header("Server", "Pingora").unwrap();

        let compressed = serde.serialize(&header).unwrap();
        let header2 = serde.deserialize(&compressed).unwrap();
        assert_eq!(header.status, header2.status);
        assert_eq!(header.headers, header2.headers);
    }

    #[test]
    fn test_no_headers() {
        let serde = HeaderSerde::new(None);
        let header = ResponseHeader::build(200, None).unwrap(); // No headers added

        // Serialize and deserialize
        let compressed = serde.serialize(&header).unwrap();
        let header2 = serde.deserialize(&compressed).unwrap();

        assert_eq!(header.status, header2.status);
        assert_eq!(header.headers.len(), 0);
        assert_eq!(header2.headers.len(), 0);
    }

    #[test]
    fn test_empty_header_wire_format() {
        let header = ResponseHeader::build(200, None).unwrap();
        let mut buf = vec![];
        resp_header_to_buf(&header, &mut buf);

        // Should be: "HTTP/1.1 200 OK\r\n\r\n", total 19 bytes
        assert_eq!(buf.len(), 19);
        assert_eq!(buf, b"HTTP/1.1 200 OK\r\n\r\n");

        // Test that httparse can handle this
        let parsed = buf_to_http_header(&buf).unwrap();
        assert_eq!(parsed.status.as_u16(), 200);
    }
}


================================================
FILE: pingora-header-serde/src/thread_zstd.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::cell::{RefCell, RefMut};
use thread_local::ThreadLocal;
use zstd_safe::{CCtx, CDict, DCtx, DDict};

/// Each thread will own its compression and decompression CTXes, and they share a single dict
/// https://facebook.github.io/zstd/zstd_manual.html recommends to reuse ctx per thread

// Both `Compression` and `CompressionWithDict` are just wrappers around the inner compression and
// decompression contexts, but have different APIs to access it.

#[derive(Default)]
pub struct Compression(CompressionInner);

// these codes are inspired by zstd crate

impl Compression {
    pub fn new() -> Self {
        Compression(CompressionInner::new())
    }

    pub fn compress_to_buffer<C: zstd_safe::WriteBuf + ?Sized>(
        &self,
        source: &[u8],
        destination: &mut C,
        level: i32,
    ) -> Result<usize, &'static str> {
        self.0.compress_to_buffer(source, destination, level)
    }

    pub fn compress(&self, data: &[u8], level: i32) -> Result<Vec<u8>, &'static str> {
        let mut buffer = make_compressed_data_buffer(data.len());
        self.compress_to_buffer(data, &mut buffer, level)?;
        Ok(buffer)
    }

    pub fn decompress_to_buffer<C: zstd_safe::WriteBuf + ?Sized>(
        &self,
        source: &[u8],
        destination: &mut C,
    ) -> Result<usize, &'static str> {
        self.0.decompress_to_buffer(source, destination)
    }
}

pub struct CompressionWithDict {
    inner: CompressionInner,
    // these dictionaries are owned by this struct, hence the static lifetime
    com_dict: CDict<'static>,
    de_dict: DDict<'static>,
}

impl CompressionWithDict {
    pub fn new(dict: &[u8], compression_level: i32) -> Self {
        CompressionWithDict {
            inner: CompressionInner::new(),
            // compression dictionary needs to be loaded ahead of time
            // with the compression level
            com_dict: CDict::create(dict, compression_level),
            de_dict: DDict::create(dict),
        }
    }

    pub fn compress_to_buffer<C: zstd_safe::WriteBuf + ?Sized>(
        &self,
        source: &[u8],
        destination: &mut C,
    ) -> Result<usize, &'static str> {
        self.inner
            .compress_to_buffer_using_dict(source, destination, &self.com_dict)
    }

    pub fn compress(&self, data: &[u8]) -> Result<Vec<u8>, &'static str> {
        let mut buffer = make_compressed_data_buffer(data.len());
        self.compress_to_buffer(data, &mut buffer)?;
        Ok(buffer)
    }

    pub fn decompress_to_buffer<C: zstd_safe::WriteBuf + ?Sized>(
        &self,
        source: &[u8],
        destination: &mut C,
    ) -> Result<usize, &'static str> {
        self.inner
            .decompress_to_buffer_using_dict(source, destination, &self.de_dict)
    }
}

#[derive(Default)]
struct CompressionInner {
    com_context: ThreadLocal<RefCell<zstd_safe::CCtx<'static>>>,
    de_context: ThreadLocal<RefCell<zstd_safe::DCtx<'static>>>,
}

impl CompressionInner {
    fn new() -> Self {
        CompressionInner {
            com_context: ThreadLocal::new(),
            de_context: ThreadLocal::new(),
        }
    }

    #[inline]
    fn get_com_context(&self) -> RefMut<'_, CCtx<'static>> {
        self.com_context
            .get_or(|| RefCell::new(CCtx::create()))
            .borrow_mut()
    }

    #[inline]
    fn get_de_context(&self) -> RefMut<'_, DCtx<'static>> {
        self.de_context
            .get_or(|| RefCell::new(DCtx::create()))
            .borrow_mut()
    }

    fn compress_to_buffer<C: zstd_safe::WriteBuf + ?Sized>(
        &self,
        source: &[u8],
        destination: &mut C,
        level: i32,
    ) -> Result<usize, &'static str> {
        self.get_com_context()
            .compress(destination, source, level)
            .map_err(zstd_safe::get_error_name)
    }

    fn decompress_to_buffer<C: zstd_safe::WriteBuf + ?Sized>(
        &self,
        source: &[u8],
        destination: &mut C,
    ) -> Result<usize, &'static str> {
        self.get_de_context()
            .decompress(destination, source)
            .map_err(zstd_safe::get_error_name)
    }

    fn compress_to_buffer_using_dict<C: zstd_safe::WriteBuf + ?Sized>(
        &self,
        source: &[u8],
        destination: &mut C,
        dict: &CDict,
    ) -> Result<usize, &'static str> {
        self.get_com_context()
            .compress_using_cdict(destination, source, dict)
            .map_err(zstd_safe::get_error_name)
    }

    pub fn decompress_to_buffer_using_dict<C: zstd_safe::WriteBuf + ?Sized>(
        &self,
        source: &[u8],
        destination: &mut C,
        dict: &DDict,
    ) -> Result<usize, &'static str> {
        self.get_de_context()
            .decompress_using_ddict(destination, source, dict)
            .map_err(zstd_safe::get_error_name)
    }
}

// Helper to create a buffer for the compressed data, preallocating enough
// for the compressed size (given the size of the uncompressed data).
#[inline]
fn make_compressed_data_buffer(uncompressed_len: usize) -> Vec<u8> {
    let buffer_len = zstd_safe::compress_bound(uncompressed_len);
    Vec::with_capacity(buffer_len)
}


================================================
FILE: pingora-header-serde/src/trainer.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use pingora_header_serde::dict::train;
use std::env;
use std::io::{self, Write};

pub fn main() {
    let args: Vec<String> = env::args().collect();
    let dict = train(&args[1]);
    io::stdout().write_all(&dict).unwrap();
}


================================================
FILE: pingora-http/Cargo.toml
================================================
[package]
name = "pingora-http"
version = "0.8.0"
authors = ["Yuchen Wu <yuchen@cloudflare.com>"]
license = "Apache-2.0"
edition = "2021"
repository = "https://github.com/cloudflare/pingora"
categories = ["web-programming"]
keywords = ["http", "pingora"]
description = """
HTTP request and response header types for Pingora.
"""

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "pingora_http"
path = "src/lib.rs"

[dependencies]
http = { workspace = true }
bytes = { workspace = true }
pingora-error = { version = "0.8.0", path = "../pingora-error" }

[features]
default = []
patched_http1 = []


================================================
FILE: pingora-http/LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: pingora-http/src/case_header_name.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use crate::*;
use bytes::Bytes;
use http::header;

#[derive(Debug, Clone)]
pub struct CaseHeaderName(Bytes);

impl CaseHeaderName {
    pub fn new(name: String) -> Self {
        CaseHeaderName(name.into())
    }
}

impl CaseHeaderName {
    pub fn as_slice(&self) -> &[u8] {
        &self.0
    }

    pub fn from_slice(buf: &[u8]) -> Self {
        CaseHeaderName(Bytes::copy_from_slice(buf))
    }
}

/// A trait that converts into case-sensitive header names.
pub trait IntoCaseHeaderName {
    fn into_case_header_name(self) -> CaseHeaderName;
}

impl IntoCaseHeaderName for CaseHeaderName {
    fn into_case_header_name(self) -> CaseHeaderName {
        self
    }
}

impl IntoCaseHeaderName for String {
    fn into_case_header_name(self) -> CaseHeaderName {
        CaseHeaderName(self.into())
    }
}

impl IntoCaseHeaderName for &'static str {
    fn into_case_header_name(self) -> CaseHeaderName {
        CaseHeaderName(self.into())
    }
}

impl IntoCaseHeaderName for HeaderName {
    fn into_case_header_name(self) -> CaseHeaderName {
        CaseHeaderName(titled_header_name(&self))
    }
}

impl IntoCaseHeaderName for &HeaderName {
    fn into_case_header_name(self) -> CaseHeaderName {
        CaseHeaderName(titled_header_name(self))
    }
}

impl IntoCaseHeaderName for Bytes {
    fn into_case_header_name(self) -> CaseHeaderName {
        CaseHeaderName(self)
    }
}

fn titled_header_name(header_name: &HeaderName) -> Bytes {
    titled_header_name_str(header_name).map_or_else(
        || Bytes::copy_from_slice(header_name.as_str().as_bytes()),
        |s| Bytes::from_static(s.as_bytes()),
    )
}

pub(crate) fn titled_header_name_str(header_name: &HeaderName) -> Option<&'static str> {
    Some(match *header_name {
        header::ACCEPT_RANGES => "Accept-Ranges",
        header::AGE => "Age",
        header::CACHE_CONTROL => "Cache-Control",
        header::CONNECTION => "Connection",
        header::CONTENT_TYPE => "Content-Type",
        header::CONTENT_ENCODING => "Content-Encoding",
        header::CONTENT_LENGTH => "Content-Length",
        header::DATE => "Date",
        header::TRANSFER_ENCODING => "Transfer-Encoding",
        header::HOST => "Host",
        header::SERVER => "Server",
        header::SET_COOKIE => "Set-Cookie",
        // TODO: add more const header here to map to their titled case
        // TODO: automatically upper case the first letter?
        _ => {
            return None;
        }
    })
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_case_header_name() {
        assert_eq!("FoO".into_case_header_name().as_slice(), b"FoO");
        assert_eq!("FoO".to_string().into_case_header_name().as_slice(), b"FoO");
        assert_eq!(header::SERVER.into_case_header_name().as_slice(), b"Server");
    }
}


================================================
FILE: pingora-http/src/lib.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! HTTP header objects that preserve http header cases
//!
//! Although HTTP header names are supposed to be case-insensitive for compatibility, proxies
//! ideally shouldn't alter the HTTP traffic, especially the headers they don't need to read.
//!
//! This crate provide structs and methods to preserve the headers in order to build a transparent
//! proxy.

#![allow(clippy::new_without_default)]

use bytes::BufMut;
use http::header::{AsHeaderName, HeaderName, HeaderValue};
use http::request::Builder as ReqBuilder;
use http::request::Parts as ReqParts;
use http::response::Builder as RespBuilder;
use http::response::Parts as RespParts;
use http::uri::Uri;
use pingora_error::{ErrorType::*, OrErr, Result};
use std::ops::{Deref, DerefMut};

pub use http::method::Method;
pub use http::status::StatusCode;
pub use http::version::Version;
pub use http::HeaderMap as HMap;

mod case_header_name;
use case_header_name::CaseHeaderName;
pub use case_header_name::IntoCaseHeaderName;

pub mod prelude {
    pub use crate::RequestHeader;
    pub use crate::ResponseHeader;
}

/* an ordered header map to store the original case of each header name
HMap({
    "foo": ["Foo", "foO", "FoO"]
})
The order how HeaderMap iter over its items is "arbitrary, but consistent".
Hopefully this property makes sure this map of header names always iterates in the
same order of the map of header values.
This idea is inspaired by hyper @nox
*/
type CaseMap = HMap<CaseHeaderName>;

pub enum HeaderNameVariant<'a> {
    Case(&'a CaseHeaderName),
    Titled(&'a str),
}

/// The HTTP request header type.
///
/// This type is similar to [http::request::Parts] but preserves header name case.
/// It also preserves request path even if it is not UTF-8.
///
/// [RequestHeader] implements [Deref] for [http::request::Parts] so it can be used as it in most
/// places.
#[derive(Debug)]
pub struct RequestHeader {
    base: ReqParts,
    header_name_map: Option<CaseMap>,
    // store the raw path bytes only if it is invalid utf-8
    raw_path_fallback: Vec<u8>, // can also be Box<[u8]>
    // whether we send END_STREAM with HEADERS for h2 requests
    send_end_stream: bool,
}

impl AsRef<ReqParts> for RequestHeader {
    fn as_ref(&self) -> &ReqParts {
        &self.base
    }
}

impl Deref for RequestHeader {
    type Target = ReqParts;

    fn deref(&self) -> &Self::Target {
        &self.base
    }
}

impl DerefMut for RequestHeader {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.base
    }
}

impl RequestHeader {
    fn new_no_case(size_hint: Option<usize>) -> Self {
        let mut base = ReqBuilder::new().body(()).unwrap().into_parts().0;
        base.headers.reserve(http_header_map_upper_bound(size_hint));
        RequestHeader {
            base,
            header_name_map: None,
            raw_path_fallback: vec![],
            send_end_stream: true,
        }
    }

    /// Create a new [RequestHeader] with the given method and path.
    ///
    /// The `path` can be non UTF-8.
    pub fn build(
        method: impl TryInto<Method>,
        path: &[u8],
        size_hint: Option<usize>,
    ) -> Result<Self> {
        let mut req = Self::build_no_case(method, path, size_hint)?;
        req.header_name_map = Some(CaseMap::with_capacity(http_header_map_upper_bound(
            size_hint,
        )));
        Ok(req)
    }

    /// Create a new [RequestHeader] with the given method and path without preserving header case.
    ///
    /// A [RequestHeader] created from this type is more space efficient than those from [Self::build()].
    ///
    /// Use this method if reading from or writing to HTTP/2 sessions where header case doesn't matter anyway.
    pub fn build_no_case(
        method: impl TryInto<Method>,
        path: &[u8],
        size_hint: Option<usize>,
    ) -> Result<Self> {
        let mut req = Self::new_no_case(size_hint);
        req.base.method = method
            .try_into()
            .explain_err(InvalidHTTPHeader, |_| "invalid method")?;
        req.set_raw_path(path)?;
        Ok(req)
    }

    /// Append the header name and value to `self`.
    ///
    /// If there are already some headers under the same name, a new value will be added without
    /// any others being removed.
    pub fn append_header(
        &mut self,
        name: impl IntoCaseHeaderName,
        value: impl TryInto<HeaderValue>,
    ) -> Result<bool> {
        let header_value = value
            .try_into()
            .explain_err(InvalidHTTPHeader, |_| "invalid value while append")?;
        append_header_value(
            self.header_name_map.as_mut(),
            &mut self.base.headers,
            name,
            header_value,
        )
    }

    /// Insert the header name and value to `self`.
    ///
    /// Different from [Self::append_header()], this method will replace all other existing headers
    /// under the same name (case-insensitive).
    pub fn insert_header(
        &mut self,
        name: impl IntoCaseHeaderName,
        value: impl TryInto<HeaderValue>,
    ) -> Result<()> {
        let header_value = value
            .try_into()
            .explain_err(InvalidHTTPHeader, |_| "invalid value while insert")?;
        insert_header_value(
            self.header_name_map.as_mut(),
            &mut self.base.headers,
            name,
            header_value,
        )
    }

    /// Remove all headers under the name
    pub fn remove_header<'a, N: ?Sized>(&mut self, name: &'a N) -> Option<HeaderValue>
    where
        &'a N: 'a + AsHeaderName,
    {
        remove_header(self.header_name_map.as_mut(), &mut self.base.headers, name)
    }

    /// Write the header to the `buf` in HTTP/1.1 wire format.
    ///
    /// The header case will be preserved.
    pub fn header_to_h1_wire(&self, buf: &mut impl BufMut) {
        header_to_h1_wire(self.header_name_map.as_ref(), &self.base.headers, buf)
    }

    /// If case sensitivity is enabled, returns an iterator to iterate over case-sensitive header names and values.
    /// Otherwise returns an empty iterator.
    ///
    /// Headers of the same name are visited in insertion order.
    pub fn case_header_iter(&self) -> impl Iterator<Item = (&CaseHeaderName, &HeaderValue)> + '_ {
        case_header_iter(self.header_name_map.as_ref(), &self.base.headers)
    }

    /// Returns true if the request has case-sensitive headers.
    pub fn has_case(&self) -> bool {
        self.header_name_map.is_some()
    }

    pub fn map<F: FnMut(HeaderNameVariant, &HeaderValue) -> Result<()>>(
        &self,
        mut f: F,
    ) -> Result<()> {
        let key_map = self.header_name_map.as_ref();
        let value_map = &self.base.headers;

        if let Some(key_map) = key_map {
            let iter = key_map.iter().zip(value_map.iter());
            for ((header, case_header), (header2, val)) in iter {
                if header != header2 {
                    // in case the header iteration order changes in future versions of HMap
                    panic!("header iter mismatch {}, {}", header, header2)
                }
                f(HeaderNameVariant::Case(case_header), val)?;
            }
        } else {
            for (header, value) in value_map {
                let titled_header =
                    case_header_name::titled_header_name_str(header).unwrap_or(header.as_str());
                f(HeaderNameVariant::Titled(titled_header), value)?;
            }
        }

        Ok(())
    }

    /// Set the request method
    pub fn set_method(&mut self, method: Method) {
        self.base.method = method;
    }

    /// Set the request URI
    pub fn set_uri(&mut self, uri: http::Uri) {
        self.base.uri = uri;
        // Clear out raw_path_fallback, or it will be used when serializing
        self.raw_path_fallback = vec![];
    }

    /// Set the request URI directly via raw bytes.
    ///
    /// Generally prefer [Self::set_uri()] to modify the header's URI if able.
    ///
    /// This API is to allow supporting non UTF-8 cases.
    pub fn set_raw_path(&mut self, path: &[u8]) -> Result<()> {
        if let Ok(p) = std::str::from_utf8(path) {
            let uri = Uri::builder()
                .path_and_query(p)
                .build()
                .explain_err(InvalidHTTPHeader, |_| format!("invalid uri {}", p))?;
            self.base.uri = uri;
            // keep raw_path empty, no need to store twice
        } else {
            // put a valid utf-8 path into base for read only access
            let lossy_str = String::from_utf8_lossy(path);
            let uri = Uri::builder()
                .path_and_query(lossy_str.as_ref())
                .build()
                .explain_err(InvalidHTTPHeader, |_| format!("invalid uri {}", lossy_str))?;
            self.base.uri = uri;
            self.raw_path_fallback = path.to_vec();
        }
        Ok(())
    }

    /// Set whether we send an END_STREAM on H2 request HEADERS if body is empty.
    pub fn set_send_end_stream(&mut self, send_end_stream: bool) {
        self.send_end_stream = send_end_stream;
    }

    /// Returns if we support sending an END_STREAM on H2 request HEADERS if body is empty,
    /// returns None if not H2.
    pub fn send_end_stream(&self) -> Option<bool> {
        if self.base.version != Version::HTTP_2 {
            return None;
        }
        Some(self.send_end_stream)
    }

    /// Return the request path in its raw format
    ///
    /// Non-UTF8 is supported.
    pub fn raw_path(&self) -> &[u8] {
        if !self.raw_path_fallback.is_empty() {
            &self.raw_path_fallback
        } else {
            // Url should always be set
            self.base
                .uri
                .path_and_query()
                .as_ref()
                .unwrap()
                .as_str()
                .as_bytes()
        }
    }

    /// Return the file extension of the path
    pub fn uri_file_extension(&self) -> Option<&str> {
        // get everything after the last '.' in path
        let (_, ext) = self
            .uri
            .path_and_query()
            .and_then(|pq| pq.path().rsplit_once('.'))?;
        Some(ext)
    }

    /// Set http version
    pub fn set_version(&mut self, version: Version) {
        self.base.version = version;
    }

    /// Clone `self` into [http::request::Parts].
    pub fn as_owned_parts(&self) -> ReqParts {
        clone_req_parts(&self.base)
    }
}

impl Clone for RequestHeader {
    fn clone(&self) -> Self {
        Self {
            base: self.as_owned_parts(),
            header_name_map: self.header_name_map.clone(),
            raw_path_fallback: self.raw_path_fallback.clone(),
            send_end_stream: self.send_end_stream,
        }
    }
}

// The `RequestHeader` will be the no case variant, because `ReqParts` keeps no header case
impl From<ReqParts> for RequestHeader {
    fn from(parts: ReqParts) -> RequestHeader {
        Self {
            base: parts,
            header_name_map: None,
            // no illegal path
            raw_path_fallback: vec![],
            send_end_stream: true,
        }
    }
}

impl From<RequestHeader> for ReqParts {
    fn from(resp: RequestHeader) -> ReqParts {
        resp.base
    }
}

/// The HTTP response header type.
///
/// This type is similar to [http::response::Parts] but preserves header name case.
/// [ResponseHeader] implements [Deref] for [http::response::Parts] so it can be used as it in most
/// places.
#[derive(Debug)]
pub struct ResponseHeader {
    base: RespParts,
    // an ordered header map to store the original case of each header name
    header_name_map: Option<CaseMap>,
    // the reason phrase of the response, if unset, a default one will be used
    reason_phrase: Option<String>,
}

impl AsRef<RespParts> for ResponseHeader {
    fn as_ref(&self) -> &RespParts {
        &self.base
    }
}

impl Deref for ResponseHeader {
    type Target = RespParts;

    fn deref(&self) -> &Self::Target {
        &self.base
    }
}

impl DerefMut for ResponseHeader {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.base
    }
}

impl Clone for ResponseHeader {
    fn clone(&self) -> Self {
        Self {
            base: self.as_owned_parts(),
            header_name_map: self.header_name_map.clone(),
            reason_phrase: self.reason_phrase.clone(),
        }
    }
}

// The `ResponseHeader` will be the no case variant, because `RespParts` keeps no header case
impl From<RespParts> for ResponseHeader {
    fn from(parts: RespParts) -> ResponseHeader {
        Self {
            base: parts,
            header_name_map: None,
            reason_phrase: None,
        }
    }
}

impl From<ResponseHeader> for RespParts {
    fn from(resp: ResponseHeader) -> RespParts {
        resp.base
    }
}

impl From<Box<ResponseHeader>> for Box<RespParts> {
    fn from(resp: Box<ResponseHeader>) -> Box<RespParts> {
        Box::new(resp.base)
    }
}

impl ResponseHeader {
    fn new(size_hint: Option<usize>) -> Self {
        let mut resp_header = Self::new_no_case(size_hint);
        resp_header.header_name_map = Some(CaseMap::with_capacity(http_header_map_upper_bound(
            size_hint,
        )));
        resp_header
    }

    fn new_no_case(size_hint: Option<usize>) -> Self {
        let mut base = RespBuilder::new().body(()).unwrap().into_parts().0;
        base.headers.reserve(http_header_map_upper_bound(size_hint));
        ResponseHeader {
            base,
            header_name_map: None,
            reason_phrase: None,
        }
    }

    /// Create a new [ResponseHeader] with the given status code.
    pub fn build(code: impl TryInto<StatusCode>, size_hint: Option<usize>) -> Result<Self> {
        let mut resp = Self::new(size_hint);
        resp.base.status = code
            .try_into()
            .explain_err(InvalidHTTPHeader, |_| "invalid status")?;
        Ok(resp)
    }

    /// Create a new [ResponseHeader] with the given status code without preserving header case.
    ///
    /// A [ResponseHeader] created from this type is more space efficient than those from [Self::build()].
    ///
    /// Use this method if reading from or writing to HTTP/2 sessions where header case doesn't matter anyway.
    pub fn build_no_case(code: impl TryInto<StatusCode>, size_hint: Option<usize>) -> Result<Self> {
        let mut resp = Self::new_no_case(size_hint);
        resp.base.status = code
            .try_into()
            .explain_err(InvalidHTTPHeader, |_| "invalid status")?;
        Ok(resp)
    }

    /// Append the header name and value to `self`.
    ///
    /// If there are already some headers under the same name, a new value will be added without
    /// any others being removed.
    pub fn append_header(
        &mut self,
        name: impl IntoCaseHeaderName,
        value: impl TryInto<HeaderValue>,
    ) -> Result<bool> {
        let header_value = value
            .try_into()
            .explain_err(InvalidHTTPHeader, |_| "invalid value while append")?;
        append_header_value(
            self.header_name_map.as_mut(),
            &mut self.base.headers,
            name,
            header_value,
        )
    }

    /// Insert the header name and value to `self`.
    ///
    /// Different from [Self::append_header()], this method will replace all other existing headers
    /// under the same name (case insensitive).
    pub fn insert_header(
        &mut self,
        name: impl IntoCaseHeaderName,
        value: impl TryInto<HeaderValue>,
    ) -> Result<()> {
        let header_value = value
            .try_into()
            .explain_err(InvalidHTTPHeader, |_| "invalid value while insert")?;
        insert_header_value(
            self.header_name_map.as_mut(),
            &mut self.base.headers,
            name,
            header_value,
        )
    }

    /// Remove all headers under the name
    pub fn remove_header<'a, N: ?Sized>(&mut self, name: &'a N) -> Option<HeaderValue>
    where
        &'a N: 'a + AsHeaderName,
    {
        remove_header(self.header_name_map.as_mut(), &mut self.base.headers, name)
    }

    /// Write the header to the `buf` in HTTP/1.1 wire format.
    ///
    /// The header case will be preserved.
    pub fn header_to_h1_wire(&self, buf: &mut impl BufMut) {
        header_to_h1_wire(self.header_name_map.as_ref(), &self.base.headers, buf)
    }

    /// If case sensitivity is enabled, returns an iterator to iterate over case-sensitive header names and values.
    /// Otherwise returns an empty iterator.
    ///
    /// Headers of the same name are visited in insertion order.
    pub fn case_header_iter(&self) -> impl Iterator<Item = (&CaseHeaderName, &HeaderValue)> + '_ {
        case_header_iter(self.header_name_map.as_ref(), &self.base.headers)
    }

    /// Returns true if the response has case-sensitive headers.
    pub fn has_case(&self) -> bool {
        self.header_name_map.is_some()
    }

    pub fn map<F: FnMut(HeaderNameVariant, &HeaderValue) -> Result<()>>(
        &self,
        mut f: F,
    ) -> Result<()> {
        let key_map = self.header_name_map.as_ref();
        let value_map = &self.base.headers;

        if let Some(key_map) = key_map {
            let iter = key_map.iter().zip(value_map.iter());
            for ((header, case_header), (header2, val)) in iter {
                if header != header2 {
                    // in case the header iteration order changes in future versions of HMap
                    panic!("header iter mismatch {}, {}", header, header2)
                }
                f(HeaderNameVariant::Case(case_header), val)?;
            }
        } else {
            for (header, value) in value_map {
                let titled_header =
                    case_header_name::titled_header_name_str(header).unwrap_or(header.as_str());
                f(HeaderNameVariant::Titled(titled_header), value)?;
            }
        }

        Ok(())
    }

    /// Set the status code
    pub fn set_status(&mut self, status: impl TryInto<StatusCode>) -> Result<()> {
        self.base.status = status
            .try_into()
            .explain_err(InvalidHTTPHeader, |_| "invalid status")?;
        Ok(())
    }

    /// Set the HTTP version
    pub fn set_version(&mut self, version: Version) {
        self.base.version = version
    }

    /// Set the HTTP reason phase. If `None`, a default reason phase will be used
    pub fn set_reason_phrase(&mut self, reason_phrase: Option<&str>) -> Result<()> {
        // No need to allocate memory to store the phrase if it is the default one.
        if reason_phrase == self.base.status.canonical_reason() {
            self.reason_phrase = None;
            return Ok(());
        }

        // TODO: validate it "*( HTAB / SP / VCHAR / obs-text )"
        self.reason_phrase = reason_phrase.map(str::to_string);
        Ok(())
    }

    /// Get the HTTP reason phase. If [Self::set_reason_phrase()] is never called
    /// or set to `None`, a default reason phase will be used
    pub fn get_reason_phrase(&self) -> Option<&str> {
        self.reason_phrase
            .as_deref()
            .or_else(|| self.base.status.canonical_reason())
    }

    /// Clone `self` into [http::response::Parts].
    pub fn as_owned_parts(&self) -> RespParts {
        clone_resp_parts(&self.base)
    }

    /// Helper function to set the HTTP content length on the response header.
    pub fn set_content_length(&mut self, len: usize) -> Result<()> {
        self.insert_header(http::header::CONTENT_LENGTH, len)
    }
}

fn clone_req_parts(me: &ReqParts) -> ReqParts {
    let mut parts = ReqBuilder::new()
        .method(me.method.clone())
        .uri(me.uri.clone())
        .version(me.version)
        .body(())
        .unwrap()
        .into_parts()
        .0;
    parts.headers = me.headers.clone();
    parts.extensions = me.extensions.clone();
    parts
}

fn clone_resp_parts(me: &RespParts) -> RespParts {
    let mut parts = RespBuilder::new()
        .status(me.status)
        .version(me.version)
        .body(())
        .unwrap()
        .into_parts()
        .0;
    parts.headers = me.headers.clone();
    parts.extensions = me.extensions.clone();
    parts
}

// This function returns an upper bound on the size of the header map used inside the http crate.
// As of version 0.2, there is a limit of 1 << 15 (32,768) items inside the map. There is an
// assertion against this size inside the crate, so we want to avoid panicking by not exceeding this
// upper bound.
fn http_header_map_upper_bound(size_hint: Option<usize>) -> usize {
    // Even though the crate has 1 << 15 as the max size, calls to `with_capacity` invoke a
    // function that returns the size + size / 3.
    //
    // See https://github.com/hyperium/http/blob/34a9d6bdab027948d6dea3b36d994f9cbaf96f75/src/header/map.rs#L3220
    //
    // Therefore we set our max size to be even lower, so we guarantee ourselves we won't hit that
    // upper bound in the crate. Any way you cut it, 4,096 headers is insane.
    const PINGORA_MAX_HEADER_COUNT: usize = 4096;
    const INIT_HEADER_SIZE: usize = 8;

    // We select the size hint or the max size here, ensuring that we pick a value substantially lower
    // than 1 << 15 with room to grow the header map.
    std::cmp::min(
        size_hint.unwrap_or(INIT_HEADER_SIZE),
        PINGORA_MAX_HEADER_COUNT,
    )
}

#[inline]
fn append_header_value<T>(
    name_map: Option<&mut CaseMap>,
    value_map: &mut HMap<T>,
    name: impl IntoCaseHeaderName,
    value: T,
) -> Result<bool> {
    let case_header_name = name.into_case_header_name();
    let header_name: HeaderName = case_header_name
        .as_slice()
        .try_into()
        .or_err(InvalidHTTPHeader, "invalid header name")?;
    // store the original case in the map
    if let Some(name_map) = name_map {
        name_map.append(header_name.clone(), case_header_name);
    }

    Ok(value_map.append(header_name, value))
}

#[inline]
fn insert_header_value<T>(
    name_map: Option<&mut CaseMap>,
    value_map: &mut HMap<T>,
    name: impl IntoCaseHeaderName,
    value: T,
) -> Result<()> {
    let case_header_name = name.into_case_header_name();
    let header_name: HeaderName = case_header_name
        .as_slice()
        .try_into()
        .or_err(InvalidHTTPHeader, "invalid header name")?;
    if let Some(name_map) = name_map {
        // store the original case in the map
        name_map.insert(header_name.clone(), case_header_name);
    }
    value_map.insert(header_name, value);
    Ok(())
}

// the &N here is to avoid clone(). None Copy type like String can impl AsHeaderName
#[inline]
fn remove_header<'a, T, N: ?Sized>(
    name_map: Option<&mut CaseMap>,
    value_map: &mut HMap<T>,
    name: &'a N,
) -> Option<T>
where
    &'a N: 'a + AsHeaderName,
{
    let removed = value_map.remove(name);
    if removed.is_some() {
        if let Some(name_map) = name_map {
            name_map.remove(name);
        }
    }
    removed
}

#[inline]
fn header_to_h1_wire(key_map: Option<&CaseMap>, value_map: &HMap, buf: &mut impl BufMut) {
    const CRLF: &[u8; 2] = b"\r\n";
    const HEADER_KV_DELIMITER: &[u8; 2] = b": ";

    if let Some(key_map) = key_map {
        case_header_iter(key_map.into(), value_map).for_each(|(case_header, val)| {
            buf.put_slice(case_header.as_slice());
            buf.put_slice(HEADER_KV_DELIMITER);
            buf.put_slice(val.as_ref());
            buf.put_slice(CRLF);
        });
    } else {
        for (header, value) in value_map {
            let titled_header =
                case_header_name::titled_header_name_str(header).unwrap_or(header.as_str());
            buf.put_slice(titled_header.as_bytes());
            buf.put_slice(HEADER_KV_DELIMITER);
            buf.put_slice(value.as_ref());
            buf.put_slice(CRLF);
        }
    }
}

#[inline]
fn case_header_iter<'a>(
    name_map: Option<&'a CaseMap>,
    value_map: &'a HMap,
) -> impl Iterator<Item = (&'a CaseHeaderName, &'a HeaderValue)> + 'a {
    name_map.into_iter().flat_map(|name_map| {
        name_map
            .iter()
            .zip(value_map.iter())
            .map(|((h1, name), (h2, value))| {
                // in case the header iteration order changes in future versions of HMap
                assert_eq!(h1, h2, "header iter mismatch {}, {}", h1, h2);
                (name, value)
            })
    })
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn header_map_upper_bound() {
        assert_eq!(8, http_header_map_upper_bound(None));
        assert_eq!(16, http_header_map_upper_bound(Some(16)));
        assert_eq!(4096, http_header_map_upper_bound(Some(7777)));
    }

    #[test]
    fn test_single_header() {
        let mut req = RequestHeader::build("GET", b"\\", None).unwrap();
        req.insert_header("foo", "bar").unwrap();
        req.insert_header("FoO", "Bar").unwrap();
        let mut buf: Vec<u8> = vec![];
        req.header_to_h1_wire(&mut buf);
        assert_eq!(buf, b"FoO: Bar\r\n");
        req.case_header_iter().enumerate().for_each(|(i, (k, v))| {
            let name = String::from_utf8_lossy(k.as_slice()).into_owned();
            let value = String::from_utf8_lossy(v.as_ref()).into_owned();
            match i + 1 {
                1 => {
                    assert_eq!(name, "FoO");
                    assert_eq!(value, "Bar");
                }
                _ => panic!("too many headers"),
            }
        });

        let mut resp = ResponseHeader::new(None);
        resp.insert_header("foo", "bar").unwrap();
        resp.insert_header("FoO", "Bar").unwrap();
        let mut buf: Vec<u8> = vec![];
        resp.header_to_h1_wire(&mut buf);
        assert_eq!(buf, b"FoO: Bar\r\n");
        resp.case_header_iter().enumerate().for_each(|(i, (k, v))| {
            let name = String::from_utf8_lossy(k.as_slice()).into_owned();
            let value = String::from_utf8_lossy(v.as_ref()).into_owned();
            match i + 1 {
                1 => {
                    assert_eq!(name, "FoO");
                    assert_eq!(value, "Bar");
                }
                _ => panic!("too many headers"),
            }
        });
    }

    #[test]
    fn test_single_header_no_case() {
        let mut req = RequestHeader::new_no_case(None);
        req.insert_header("foo", "bar").unwrap();
        req.insert_header("FoO", "Bar").unwrap();
        let mut buf: Vec<u8> = vec![];
        req.header_to_h1_wire(&mut buf);
        assert_eq!(buf, b"foo: Bar\r\n");
        req.case_header_iter().for_each(|(_, _)| {
            unreachable!("request has no case");
        });

        let mut resp = ResponseHeader::new_no_case(None);
        resp.insert_header("foo", "bar").unwrap();
        resp.insert_header("FoO", "Bar").unwrap();
        let mut buf: Vec<u8> = vec![];
        resp.header_to_h1_wire(&mut buf);
        assert_eq!(buf, b"foo: Bar\r\n");
        resp.case_header_iter().for_each(|(_, _)| {
            unreachable!("response has no case");
        });
    }

    #[test]
    fn test_multiple_header() {
        let mut req = RequestHeader::build("GET", b"\\", None).unwrap();
        req.append_header("FoO", "Bar").unwrap();
        req.append_header("fOO", "bar").unwrap();
        req.append_header("BAZ", "baR").unwrap();
        req.append_header(http::header::CONTENT_LENGTH, "0")
            .unwrap();
        req.append_header("a", "b").unwrap();
        req.remove_header("a");
        let mut buf: Vec<u8> = vec![];
        req.header_to_h1_wire(&mut buf);
        assert_eq!(
            buf,
            b"FoO: Bar\r\nfOO: bar\r\nBAZ: baR\r\nContent-Length: 0\r\n"
        );
        req.case_header_iter().enumerate().for_each(|(i, (k, v))| {
            let name = String::from_utf8_lossy(k.as_slice()).into_owned();
            let value = String::from_utf8_lossy(v.as_ref()).into_owned();
            match i + 1 {
                1 => {
                    assert_eq!(name, "FoO");
                    assert_eq!(value, "Bar");
                }
                2 => {
                    assert_eq!(name, "fOO");
                    assert_eq!(value, "bar");
                }
                3 => {
                    assert_eq!(name, "BAZ");
                    assert_eq!(value, "baR");
                }
                4 => {
                    assert_eq!(name, "Content-Length");
                    assert_eq!(value, "0");
                }
                _ => panic!("too many headers"),
            }
        });

        let mut resp = ResponseHeader::new(None);
        resp.append_header("FoO", "Bar").unwrap();
        resp.append_header("fOO", "bar").unwrap();
        resp.append_header("BAZ", "baR").unwrap();
        resp.append_header(http::header::CONTENT_LENGTH, "0")
            .unwrap();
        resp.append_header("a", "b").unwrap();
        resp.remove_header("a");
        let mut buf: Vec<u8> = vec![];
        resp.header_to_h1_wire(&mut buf);
        assert_eq!(
            buf,
            b"FoO: Bar\r\nfOO: bar\r\nBAZ: baR\r\nContent-Length: 0\r\n"
        );
        resp.case_header_iter().enumerate().for_each(|(i, (k, v))| {
            let name = String::from_utf8_lossy(k.as_slice()).into_owned();
            let value = String::from_utf8_lossy(v.as_ref()).into_owned();
            match i + 1 {
                1 => {
                    assert_eq!(name, "FoO");
                    assert_eq!(value, "Bar");
                }
                2 => {
                    assert_eq!(name, "fOO");
                    assert_eq!(value, "bar");
                }
                3 => {
                    assert_eq!(name, "BAZ");
                    assert_eq!(value, "baR");
                }
                4 => {
                    assert_eq!(name, "Content-Length");
                    assert_eq!(value, "0");
                }
                _ => panic!("too many headers"),
            }
        });
    }

    #[cfg(feature = "patched_http1")]
    #[test]
    fn test_invalid_path() {
        let raw_path = b"Hello\xF0\x90\x80World";
        let req = RequestHeader::build("GET", &raw_path[..], None).unwrap();
        assert_eq!("Hello�World", req.uri.path_and_query().unwrap());
        assert_eq!(raw_path, req.raw_path());
    }

    #[cfg(feature = "patched_http1")]
    #[test]
    fn test_override_invalid_path() {
        let raw_path = b"Hello\xF0\x90\x80World";
        let mut req = RequestHeader::build("GET", &raw_path[..], None).unwrap();
        assert_eq!("Hello�World", req.uri.path_and_query().unwrap());
        assert_eq!(raw_path, req.raw_path());

        let new_path = "/HelloWorld";
        req.set_uri(Uri::builder().path_and_query(new_path).build().unwrap());
        assert_eq!(new_path, req.uri.path_and_query().unwrap());
        assert_eq!(new_path.as_bytes(), req.raw_path());
    }

    #[test]
    fn test_reason_phrase() {
        let mut resp = ResponseHeader::new(None);
        let reason = resp.get_reason_phrase().unwrap();
        assert_eq!(reason, "OK");

        resp.set_reason_phrase(Some("FooBar")).unwrap();
        let reason = resp.get_reason_phrase().unwrap();
        assert_eq!(reason, "FooBar");

        resp.set_reason_phrase(Some("OK")).unwrap();
        let reason = resp.get_reason_phrase().unwrap();
        assert_eq!(reason, "OK");

        resp.set_reason_phrase(None).unwrap();
        let reason = resp.get_reason_phrase().unwrap();
        assert_eq!(reason, "OK");
    }

    #[test]
    fn set_test_send_end_stream() {
        let mut req = RequestHeader::build("GET", b"/", None).unwrap();
        req.set_send_end_stream(true);

        // None for requests that are not h2
        assert!(req.send_end_stream().is_none());

        let mut req = RequestHeader::build("GET", b"/", None).unwrap();
        req.set_version(Version::HTTP_2);

        // Some(true) by default for h2
        assert!(req.send_end_stream().unwrap());

        req.set_send_end_stream(false);
        // Some(false)
        assert!(!req.send_end_stream().unwrap());
    }

    #[test]
    fn set_test_set_content_length() {
        let mut resp = ResponseHeader::new(None);
        resp.set_content_length(10).unwrap();

        assert_eq!(
            b"10",
            resp.headers
                .get(http::header::CONTENT_LENGTH)
                .map(|d| d.as_bytes())
                .unwrap()
        );
    }
}


================================================
FILE: pingora-ketama/Cargo.toml
================================================
[package]
name = "pingora-ketama"
version = "0.8.0"
description = "Rust port of the nginx consistent hash function"
authors = ["Pingora Team <pingora@cloudflare.com>"]
license = "Apache-2.0"
edition = "2021"
repository = "https://github.com/cloudflare/pingora"
categories = ["caching", "algorithms"]
keywords = ["hash", "hashing", "consistent", "pingora"]

[dependencies]
crc32fast = "1.3"
i_key_sort = { version = "0.10.1", optional = true, features = ["allow_multithreading"] }

[dev-dependencies]
criterion = "0.7"
csv = "1.2"
dhat = "0.3"
env_logger = "0.11"
log = { workspace = true }
rand = "0.9.2"

[[bench]]
name = "simple"
harness = false

[[bench]]
name = "memory"
harness = false

[features]
heap-prof = []
v2 = ["i_key_sort"]


================================================
FILE: pingora-ketama/LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: pingora-ketama/benches/memory.rs
================================================
use pingora_ketama::{Bucket, Continuum};

#[global_allocator]
static ALLOC: dhat::Alloc = dhat::Alloc;

fn buckets() -> Vec<Bucket> {
    let mut b = Vec::new();

    for i in 1..254 {
        b.push(Bucket::new(
            format!("127.0.0.{i}:6443").parse().unwrap(),
            10,
        ));
    }

    b
}

pub fn main() {
    let _profiler = dhat::Profiler::new_heap();
    let _c = Continuum::new(&buckets());
}


================================================
FILE: pingora-ketama/benches/simple.rs
================================================
use pingora_ketama::{Bucket, Continuum};

use criterion::{criterion_group, criterion_main, Criterion};
use rand::{
    distr::{Alphanumeric, SampleString},
    rng,
};

#[cfg(feature = "heap-prof")]
#[global_allocator]
static ALLOC: dhat::Alloc = dhat::Alloc;

fn buckets() -> Vec<Bucket> {
    let mut b = Vec::new();

    for i in 1..101 {
        b.push(Bucket::new(format!("127.0.0.{i}:6443").parse().unwrap(), 1));
    }

    b
}

fn random_string() -> String {
    let mut rand = rng();
    Alphanumeric.sample_string(&mut rand, 30)
}

pub fn criterion_benchmark(c: &mut Criterion) {
    #[cfg(feature = "heap-prof")]
    let _profiler = dhat::Profiler::new_heap();

    c.bench_function("create_continuum", |b| {
        b.iter(|| Continuum::new(&buckets()))
    });

    c.bench_function("continuum_hash", |b| {
        let continuum = Continuum::new(&buckets());

        b.iter(|| continuum.node(random_string().as_bytes()))
    });
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);


================================================
FILE: pingora-ketama/examples/health_aware_selector.rs
================================================
use log::info;
use pingora_ketama::{Bucket, Continuum};
use std::collections::HashMap;
use std::net::SocketAddr;

// A repository for node healthiness, emulating a health checker.
struct NodeHealthRepository {
    nodes: HashMap<SocketAddr, bool>,
}

impl NodeHealthRepository {
    fn new() -> Self {
        NodeHealthRepository {
            nodes: HashMap::new(),
        }
    }

    fn set_node_health(&mut self, node: SocketAddr, is_healthy: bool) {
        self.nodes.insert(node, is_healthy);
    }

    fn node_is_healthy(&self, node: &SocketAddr) -> bool {
        self.nodes.get(node).cloned().unwrap_or(false)
    }
}

// A health-aware node selector, which relies on the above health repository.
struct HealthAwareNodeSelector<'a> {
    ring: Continuum,
    max_tries: usize,
    node_health_repo: &'a NodeHealthRepository,
}

impl HealthAwareNodeSelector<'_> {
    fn new(r: Continuum, tries: usize, nhr: &NodeHealthRepository) -> HealthAwareNodeSelector<'_> {
        HealthAwareNodeSelector {
            ring: r,
            max_tries: tries,
            node_health_repo: nhr,
        }
    }

    // Try to select a node within <max_tries> attempts.
    fn try_select(&self, key: &str) -> Option<SocketAddr> {
        let node_iter = self.ring.node_iter(key.as_bytes());

        for (tries, node) in node_iter.enumerate() {
            if tries >= self.max_tries {
                break;
            }

            if self.node_health_repo.node_is_healthy(node) {
                return Some(*node);
            }
        }

        None
    }
}

// RUST_LOG=INFO cargo run --example health_aware_selector
fn main() {
    env_logger::init();

    // Set up some nodes.
    let buckets: Vec<_> = (1..=10)
        .map(|i| Bucket::new(format!("127.0.0.{i}:6443").parse().unwrap(), 1))
        .collect();

    // Mark the 1-5th nodes healthy, the 6-10th nodes unhealthy.
    let mut health_repo = NodeHealthRepository::new();
    (1..=10)
        .map(|i| (i, format!("127.0.0.{i}:6443").parse().unwrap()))
        .for_each(|(i, n)| {
            health_repo.set_node_health(n, i < 6);
        });

    // Create a health-aware selector with up to 3 tries.
    let health_aware_selector =
        HealthAwareNodeSelector::new(Continuum::new(&buckets), 3, &health_repo);

    // Let's try the selector on a few keys.
    for i in 0..5 {
        let key = format!("key_{i}");
        match health_aware_selector.try_select(&key) {
            Some(node) => {
                info!("{key}: {}:{}", node.ip(), node.port());
            }
            None => {
                info!("{key}: no healthy node found!");
            }
        }
    }
}


================================================
FILE: pingora-ketama/src/lib.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! # pingora-ketama
//! A Rust port of the nginx consistent hashing algorithm.
//!
//! This crate provides a consistent hashing algorithm which is identical in
//! behavior to [nginx consistent hashing](https://www.nginx.com/resources/wiki/modules/consistent_hash/).
//!
//! Using a consistent hash strategy like this is useful when one wants to
//! minimize the amount of requests that need to be rehashed to different nodes
//! when a node is added or removed.
//!
//! Here's a simple example of how one might use it:
//!
//! ```
//! use pingora_ketama::{Bucket, Continuum};
//!
//! # #[allow(clippy::needless_doctest_main)]
//! fn main() {
//!     // Set up a continuum with a few nodes of various weight.
//!     let mut buckets = vec![];
//!     buckets.push(Bucket::new("127.0.0.1:12345".parse().unwrap(), 1));
//!     buckets.push(Bucket::new("127.0.0.2:12345".parse().unwrap(), 2));
//!     buckets.push(Bucket::new("127.0.0.3:12345".parse().unwrap(), 3));
//!     let ring = Continuum::new(&buckets);
//!
//!     // Let's see what the result is for a few keys:
//!     for key in &["some_key", "another_key", "last_key"] {
//!         let node = ring.node(key.as_bytes()).unwrap();
//!         println!("{}: {}:{}", key, node.ip(), node.port());
//!     }
//! }
//! ```
//!
//! ```bash
//! # Output:
//! some_key: 127.0.0.3:12345
//! another_key: 127.0.0.3:12345
//! last_key: 127.0.0.2:12345
//! ```
//!
//! We've provided a health-aware example in
//! `pingora-ketama/examples/health_aware_selector.rs`.
//!
//! For a carefully crafted real-world example, see the [`pingora-load-balancing`](https://docs.rs/pingora-load-balancing)
//! crate.

use std::cmp::Ordering;
use std::io::Write;
use std::net::SocketAddr;

use crc32fast::Hasher;
#[cfg(feature = "v2")]
use i_key_sort::sort::one_key_cmp::OneKeyAndCmpSort;

/// This constant is copied from nginx. It will create 160 points per weight
/// unit. For example, a weight of 2 will create 320 points on the ring.
pub const DEFAULT_POINT_MULTIPLE: u32 = 160;

/// A [Bucket] represents a server for consistent hashing
///
/// A [Bucket] contains a [SocketAddr] to the server and a weight associated with it.
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord)]
pub struct Bucket {
    // The node name.
    // TODO: UDS
    node: SocketAddr,

    // The weight associated with a node. A higher weight indicates that this node should
    // receive more requests.
    weight: u32,
}

impl Bucket {
    /// Return a new bucket with the given node and weight.
    ///
    /// The chance that a [Bucket] is selected is proportional to the relative weight of all [Bucket]s.
    ///
    /// # Panics
    ///
    /// This will panic if the weight is zero.
    pub fn new(node: SocketAddr, weight: u32) -> Self {
        assert!(weight != 0, "weight must be at least one");

        Bucket { node, weight }
    }
}

// A point on the continuum.
#[derive(Clone, Debug, Eq, PartialEq)]
struct PointV1 {
    // the index to the actual address
    node: u32,
    hash: u32,
}

// We only want to compare the hash when sorting, so we implement these traits by hand.
impl Ord for PointV1 {
    fn cmp(&self, other: &Self) -> Ordering {
        self.hash.cmp(&other.hash)
    }
}

impl PartialOrd for PointV1 {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
    }
}

impl PointV1 {
    fn new(node: u32, hash: u32) -> Self {
        PointV1 { node, hash }
    }
}

/// A point on the continuum.
///
/// We are trying to save memory here, so this struct is equivalent to a struct
/// this this definition, but doesn't require using the "untrustworthy" compact
/// repr. This does mean we have to do the memory layout manually though, but
/// the benchmarks show there is no performance hit for it.
///
/// #[repr(Rust, packed)]
/// struct Point {
///     node: u16,
///     hash: u32,
/// }
#[cfg(feature = "v2")]
#[derive(Copy, Clone, Eq, PartialEq)]
#[repr(transparent)]
struct PointV2([u8; 6]);

#[cfg(feature = "v2")]
impl PointV2 {
    fn new(node: u16, hash: u32) -> Self {
        let mut this = [0; 6];

        this[0..4].copy_from_slice(&hash.to_ne_bytes());
        this[4..6].copy_from_slice(&node.to_ne_bytes());

        Self(this)
    }

    /// Return the hash of the point which is stored in the first 4 bytes (big endian).
    fn hash(&self) -> u32 {
        u32::from_ne_bytes(self.0[0..4].try_into().expect("There are exactly 4 bytes"))
    }

    /// Return the node of the point which is stored in the last 2 bytes (big endian).
    fn node(&self) -> u16 {
        u16::from_ne_bytes(self.0[4..6].try_into().expect("There are exactly 2 bytes"))
    }
}

#[derive(Copy, Clone, Debug, Eq, PartialEq, Default)]
pub enum Version {
    #[default]
    V1,
    #[cfg(feature = "v2")]
    V2 { point_multiple: u32 },
}

impl Version {
    fn point_multiple(&self) -> u32 {
        match self {
            Version::V1 => DEFAULT_POINT_MULTIPLE,
            #[cfg(feature = "v2")]
            Version::V2 { point_multiple } => *point_multiple,
        }
    }
}

enum RingBuilder {
    V1(Vec<PointV1>),
    #[cfg(feature = "v2")]
    V2(Vec<PointV2>),
}

impl RingBuilder {
    fn new(version: Version, total_weight: u32) -> Self {
        match version {
            Version::V1 => RingBuilder::V1(Vec::with_capacity(
                (total_weight * DEFAULT_POINT_MULTIPLE) as usize,
            )),
            #[cfg(feature = "v2")]
            Version::V2 { point_multiple } => {
                RingBuilder::V2(Vec::with_capacity((total_weight * point_multiple) as usize))
            }
        }
    }

    fn push(&mut self, node: u16, hash: u32) {
        match self {
            RingBuilder::V1(ring) => {
                ring.push(PointV1::new(node as u32, hash));
            }
            #[cfg(feature = "v2")]
            RingBuilder::V2(ring) => {
                ring.push(PointV2::new(node, hash));
            }
        }
    }

    #[allow(unused)]
    fn sort(&mut self, addresses: &[SocketAddr]) {
        match self {
            RingBuilder::V1(ring) => {
                // Sort and remove any duplicates.
                ring.sort_unstable();
                ring.dedup_by(|a, b| a.hash == b.hash);
            }
            #[cfg(feature = "v2")]
            RingBuilder::V2(ring) => {
                ring.sort_by_one_key_then_by(
                    true,
                    |p| p.hash(),
                    |p1, p2| addresses[p1.node() as usize].cmp(&addresses[p2.node() as usize]),
                );

                //secondary_radix_sort(ring, |p| p.hash(), |p| addresses[p.node() as usize]);
                ring.dedup_by(|a, b| a.0[0..4] == b.0[0..4]);
            }
        }
    }
}

impl From<RingBuilder> for VersionedRing {
    fn from(ring: RingBuilder) -> Self {
        match ring {
            RingBuilder::V1(ring) => VersionedRing::V1(ring.into_boxed_slice()),
            #[cfg(feature = "v2")]
            RingBuilder::V2(ring) => VersionedRing::V2(ring.into_boxed_slice()),
        }
    }
}

enum VersionedRing {
    V1(Box<[PointV1]>),
    #[cfg(feature = "v2")]
    V2(Box<[PointV2]>),
}

impl VersionedRing {
    /// Find the associated index for the given input.
    pub fn node_idx(&self, hash: u32) -> usize {
        // The `Result` returned here is either a match or the error variant
        // returns where the value would be inserted.
        let search_result = match self {
            VersionedRing::V1(ring) => ring.binary_search_by(|p| p.hash.cmp(&hash)),
            #[cfg(feature = "v2")]
            VersionedRing::V2(ring) => ring.binary_search_by(|p| p.hash().cmp(&hash)),
        };

        match search_result {
            Ok(i) => i,
            Err(i) => {
                // We wrap around to the front if this value would be
                // inserted at the end.
                if i == self.len() {
                    0
                } else {
                    i
                }
            }
        }
    }

    pub fn get(&self, index: usize) -> Option<usize> {
        match self {
            VersionedRing::V1(ring) => ring.get(index).map(|p| p.node as usize),
            #[cfg(feature = "v2")]
            VersionedRing::V2(ring) => ring.get(index).map(|p| p.node() as usize),
        }
    }

    pub fn len(&self) -> usize {
        match self {
            VersionedRing::V1(ring) => ring.len(),
            #[cfg(feature = "v2")]
            VersionedRing::V2(ring) => ring.len(),
        }
    }
}

/// The consistent hashing ring
///
/// A [Continuum] represents a ring of buckets where a node is associated with various points on
/// the ring.
pub struct Continuum {
    ring: VersionedRing,
    addrs: Box<[SocketAddr]>,
}

impl Continuum {
    pub fn new(buckets: &[Bucket]) -> Self {
        Self::new_with_version(buckets, Version::default())
    }

    /// Create a new [Continuum] with the given list of buckets.
    pub fn new_with_version(buckets: &[Bucket], version: Version) -> Self {
        if buckets.is_empty() {
            return Continuum {
                ring: VersionedRing::V1(Box::new([])),
                addrs: Box::new([]),
            };
        }

        // The total weight is multiplied by the factor of points to create many points per node.
        let total_weight: u32 = buckets.iter().fold(0, |sum, b| sum + b.weight);
        let mut ring = RingBuilder::new(version, total_weight);
        let mut addrs = Vec::with_capacity(buckets.len());

        for bucket in buckets {
            let mut hasher = Hasher::new();

            // We only do the following for backwards compatibility with nginx/memcache:
            // - Convert SocketAddr to string
            // - The hash input is as follows "HOST EMPTY PORT PREVIOUS_HASH". Spaces are only added
            //   for readability.
            // TODO: remove this logic and hash the literal SocketAddr once we no longer
            // need backwards compatibility

            // with_capacity = max_len(ipv6)(39) + len(null)(1) + max_len(port)(5)
            let mut hash_bytes = Vec::with_capacity(39 + 1 + 5);
            write!(&mut hash_bytes, "{}", bucket.node.ip()).unwrap();
            write!(&mut hash_bytes, "\0").unwrap();
            write!(&mut hash_bytes, "{}", bucket.node.port()).unwrap();
            hasher.update(hash_bytes.as_ref());

            // A higher weight will add more points for this node.
            let num_points = bucket.weight * version.point_multiple();

            // This is appended to the crc32 hash for each point.
            let mut prev_hash: u32 = 0;
            addrs.push(bucket.node);
            let node = addrs.len() - 1;
            for _ in 0..num_points {
                let mut hasher = hasher.clone();
                hasher.update(&prev_hash.to_le_bytes());

                let hash = hasher.finalize();
                ring.push(node as u16, hash);
                prev_hash = hash;
            }
        }

        let addrs = addrs.into_boxed_slice();

        // Sort and remove any duplicates.
        ring.sort(&addrs);

        Continuum {
            ring: ring.into(),
            addrs,
        }
    }

    /// Find the associated index for the given input.
    pub fn node_idx(&self, input: &[u8]) -> usize {
        let hash = crc32fast::hash(input);
        self.ring.node_idx(hash)
    }

    /// Hash the given `hash_key` to the server address.
    pub fn node(&self, hash_key: &[u8]) -> Option<SocketAddr> {
        self.ring
            .get(self.node_idx(hash_key)) // should we unwrap here?
            .map(|n| self.addrs[n])
    }

    /// Get an iterator of nodes starting at the original hashed node of the `hash_key`.
    ///
    /// This function is useful to find failover servers if the original ones are offline, which is
    /// cheaper than rebuilding the entire hash ring.
    pub fn node_iter(&self, hash_key: &[u8]) -> NodeIterator<'_> {
        NodeIterator {
            idx: self.node_idx(hash_key),
            continuum: self,
        }
    }

    pub fn get_addr(&self, idx: &mut usize) -> Option<&SocketAddr> {
        let point = self.ring.get(*idx);
        if point.is_some() {
            // only update idx for non-empty ring otherwise we will panic on modulo 0
            *idx = (*idx + 1) % self.ring.len();
        }
        point.map(|n| &self.addrs[n])
    }
}

/// Iterator over a Continuum
pub struct NodeIterator<'a> {
    idx: usize,
    continuum: &'a Continuum,
}

impl<'a> Iterator for NodeIterator<'a> {
    type Item = &'a SocketAddr;

    fn next(&mut self) -> Option<Self::Item> {
        self.continuum.get_addr(&mut self.idx)
    }
}

#[cfg(test)]
mod tests {
    use std::net::SocketAddr;
    use std::path::Path;

    use super::{Bucket, Continuum};

    fn get_sockaddr(ip: &str) -> SocketAddr {
        ip.parse().unwrap()
    }

    #[test]
    fn consistency_after_adding_host() {
        fn assert_hosts(c: &Continuum) {
            assert_eq!(c.node(b"a"), Some(get_sockaddr("127.0.0.10:6443")));
            assert_eq!(c.node(b"b"), Some(get_sockaddr("127.0.0.5:6443")));
        }

        let buckets: Vec<_> = (1..11)
            .map(|u| Bucket::new(get_sockaddr(&format!("127.0.0.{u}:6443")), 1))
            .collect();
        let c = Continuum::new(&buckets);
        assert_hosts(&c);

        // Now add a new host and ensure that the hosts don't get shuffled.
        let buckets: Vec<_> = (1..12)
            .map(|u| Bucket::new(get_sockaddr(&format!("127.0.0.{u}:6443")), 1))
            .collect();

        let c = Continuum::new(&buckets);
        assert_hosts(&c);
    }

    #[test]
    fn matches_nginx_sample() {
        let upstream_hosts = ["127.0.0.1:7777", "127.0.0.1:7778"];
        let upstream_hosts = upstream_hosts.iter().map(|i| get_sockaddr(i));

        let mut buckets = Vec::new();
        for upstream in upstream_hosts {
            buckets.push(Bucket::new(upstream, 1));
        }

        let c = Continuum::new(&buckets);

        assert_eq!(c.node(b"/some/path"), Some(get_sockaddr("127.0.0.1:7778")));
        assert_eq!(
            c.node(b"/some/longer/path"),
            Some(get_sockaddr("127.0.0.1:7777"))
        );
        assert_eq!(
            c.node(b"/sad/zaidoon"),
            Some(get_sockaddr("127.0.0.1:7778"))
        );
        assert_eq!(c.node(b"/g"), Some(get_sockaddr("127.0.0.1:7777")));
        assert_eq!(
            c.node(b"/pingora/team/is/cool/and/this/is/a/long/uri"),
            Some(get_sockaddr("127.0.0.1:7778"))
        );
        assert_eq!(
            c.node(b"/i/am/not/confident/in/this/code"),
            Some(get_sockaddr("127.0.0.1:7777"))
        );
    }

    #[test]
    fn matches_nginx_sample_data() {
        let upstream_hosts = [
            "10.0.0.1:443",
            "10.0.0.2:443",
            "10.0.0.3:443",
            "10.0.0.4:443",
            "10.0.0.5:443",
            "10.0.0.6:443",
            "10.0.0.7:443",
            "10.0.0.8:443",
            "10.0.0.9:443",
        ];
        let upstream_hosts = upstream_hosts.iter().map(|i| get_sockaddr(i));

        let mut buckets = Vec::new();
        for upstream in upstream_hosts {
            buckets.push(Bucket::new(upstream, 100));
        }

        let c = Continuum::new(&buckets);

        let path = Path::new(env!("CARGO_MANIFEST_DIR"))
            .join("test-data")
            .join("sample-nginx-upstream.csv");

        let mut rdr = csv::ReaderBuilder::new()
            .has_headers(false)
            .from_path(path)
            .unwrap();

        for pair in rdr.records() {
            let pair = pair.unwrap();
            let uri = pair.get(0).unwrap();
            let upstream = pair.get(1).unwrap();

            let got = c.node(uri.as_bytes()).unwrap();
            assert_eq!(got, get_sockaddr(upstream));
        }
    }

    #[test]
    fn node_iter() {
        let upstream_hosts = ["127.0.0.1:7777", "127.0.0.1:7778", "127.0.0.1:7779"];
        let upstream_hosts = upstream_hosts.iter().map(|i| get_sockaddr(i));

        let mut buckets = Vec::new();
        for upstream in upstream_hosts {
            buckets.push(Bucket::new(upstream, 1));
        }

        let c = Continuum::new(&buckets);
        let mut iter = c.node_iter(b"doghash");
        assert_eq!(iter.next(), Some(&get_sockaddr("127.0.0.1:7778")));
        assert_eq!(iter.next(), Some(&get_sockaddr("127.0.0.1:7779")));
        assert_eq!(iter.next(), Some(&get_sockaddr("127.0.0.1:7779")));
        assert_eq!(iter.next(), Some(&get_sockaddr("127.0.0.1:7777")));
        assert_eq!(iter.next(), Some(&get_sockaddr("127.0.0.1:7777")));
        assert_eq!(iter.next(), Some(&get_sockaddr("127.0.0.1:7778")));
        assert_eq!(iter.next(), Some(&get_sockaddr("127.0.0.1:7778")));
        assert_eq!(iter.next(), Some(&get_sockaddr("127.0.0.1:7779")));

        // drop 127.0.0.1:7777
        let upstream_hosts = ["127.0.0.1:7777", "127.0.0.1:7779"];
        let upstream_hosts = upstream_hosts.iter().map(|i| get_sockaddr(i));

        let mut buckets = Vec::new();
        for upstream in upstream_hosts {
            buckets.push(Bucket::new(upstream, 1));
        }

        let c = Continuum::new(&buckets);
        let mut iter = c.node_iter(b"doghash");
        // 127.0.0.1:7778 nodes are gone now
        // assert_eq!(iter.next(), Some("127.0.0.1:7778"));
        assert_eq!(iter.next(), Some(&get_sockaddr("127.0.0.1:7779")));
        assert_eq!(iter.next(), Some(&get_sockaddr("127.0.0.1:7779")));
        assert_eq!(iter.next(), Some(&get_sockaddr("127.0.0.1:7777")));
        assert_eq!(iter.next(), Some(&get_sockaddr("127.0.0.1:7777")));
        // assert_eq!(iter.next(), Some("127.0.0.1:7778"));
        // assert_eq!(iter.next(), Some("127.0.0.1:7778"));
        assert_eq!(iter.next(), Some(&get_sockaddr("127.0.0.1:7779")));

        // assert infinite cycle
        let c = Continuum::new(&[Bucket::new(get_sockaddr("127.0.0.1:7777"), 1)]);
        let mut iter = c.node_iter(b"doghash");

        let start_idx = iter.idx;
        for _ in 0..c.ring.len() {
            assert!(iter.next().is_some());
        }
        // assert wrap around
        assert_eq!(start_idx, iter.idx);
    }

    #[test]
    fn test_empty() {
        let c = Continuum::new(&[]);
        assert!(c.node(b"doghash").is_none());

        let mut iter = c.node_iter(b"doghash");
        assert!(iter.next().is_none());
        assert!(iter.next().is_none());
        assert!(iter.next().is_none());
    }

    #[test]
    fn test_ipv6_ring() {
        let upstream_hosts = ["[::1]:7777", "[::1]:7778", "[::1]:7779"];
        let upstream_hosts = upstream_hosts.iter().map(|i| get_sockaddr(i));

        let mut buckets = Vec::new();
        for upstream in upstream_hosts {
            buckets.push(Bucket::new(upstream, 1));
        }

        let c = Continuum::new(&buckets);
        let mut iter = c.node_iter(b"doghash");
        assert_eq!(iter.next(), Some(&get_sockaddr("[::1]:7777")));
        assert_eq!(iter.next(), Some(&get_sockaddr("[::1]:7778")));
        assert_eq!(iter.next(), Some(&get_sockaddr("[::1]:7777")));
        assert_eq!(iter.next(), Some(&get_sockaddr("[::1]:7778")));
        assert_eq!(iter.next(), Some(&get_sockaddr("[::1]:7778")));
        assert_eq!(iter.next(), Some(&get_sockaddr("[::1]:7777")));
        assert_eq!(iter.next(), Some(&get_sockaddr("[::1]:7779")));
    }
}


================================================
FILE: pingora-ketama/test-data/README.md
================================================
# Steps to generate nginx upstream ketama hash logs

1. Prepare nginx conf
```
mkdir -p /tmp/nginx-ketama/logs
cp nginx.conf /tmp/nginx-ketama
nginx -t -c nginx.conf -p /tmp/nginx-ketama
```

2. Generate trace
```
./trace.sh
```

3. Collect trace
```
 cp /tmp/nginx-ketama/logs/access.log ./sample-nginx-upstream.csv
```

================================================
FILE: pingora-ketama/test-data/nginx.conf
================================================
events {}
http {
    log_format upper '$request_uri,$upstream_addr';

    upstream uppers {
        hash $request_uri consistent;

        server 10.0.0.1:443 weight=100 max_fails=0;
        server 10.0.0.2:443 weight=100 max_fails=0;
        server 10.0.0.3:443 weight=100 max_fails=0;
        server 10.0.0.4:443 weight=100 max_fails=0;
        server 10.0.0.5:443 weight=100 max_fails=0;
        server 10.0.0.6:443 weight=100 max_fails=0;
        server 10.0.0.7:443 weight=100 max_fails=0;
        server 10.0.0.8:443 weight=100 max_fails=0;
        server 10.0.0.9:443 weight=100 max_fails=0;
    }

    server {
        listen 127.0.0.1:8080;

        location / {
            access_log /tmp/nginx-ketama/logs/access.log upper;
            proxy_connect_timeout 5ms;
            proxy_next_upstream off;
            proxy_pass http://uppers;
        }
    }
}

================================================
FILE: pingora-ketama/test-data/sample-nginx-upstream.csv
================================================
/81fa1d251d605775d647b5b55565e71526d4cef6,10.0.0.7:443
/2fec328e6ccdda6a7edf329f9f780e546ea183b4,10.0.0.5:443
/19fb835d90883a6263ec4279c6da184e3f1a79b2,10.0.0.4:443
/da7a88e542f7aaddc074f988164b9df7e5f7fea6,10.0.0.4:443
/8f87cfd8005306643b6528b3d4125cf005139a7e,10.0.0.5:443
/26d2769eab098458bc3e4e641a4b7d8abffd0aea,10.0.0.6:443
/aa5b5323980f2d3e21246212ebd820c3949c1e88,10.0.0.7:443
/d9c4bc3cc4517c629e8f4c911c2fd8baf260ae65,10.0.0.1:443
/28c1c069a2904bb3b3e0f9731b1ff8de9ab7a76d,10.0.0.4:443
/fe5199bdfeee5cd431ae7e9f77f178164f9995a0,10.0.0.9:443
/43992eee187920c5e8695332f71ca6e23ef6ac4b,10.0.0.3:443
/38528aab753a6f32de86b5a7acdbb0c885137a81,10.0.0.9:443
/12d4b9155ff599c0ac554226796b58a2278b450f,10.0.0.7:443
/9c34c9a4f9009997dd29c6e6a627b0aca7beb6e5,10.0.0.5:443
/eb5a2ab55796afd673874fd7560f1329be5540bd,10.0.0.9:443
/ad7b5395766b77098c3f212043650a805b622ffe,10.0.0.3:443
/c72fedf4177499635302849496898fe4f3409cc1,10.0.0.9:443
/77766138aaf0c016bdd1f6b996177fc8ca1d2204,10.0.0.8:443
/860c86b94e04f2648fb164c87fd6166707fd08ff,10.0.0.6:443
/1b419454e4eb63ef915e8e06cc11110a3ccd607e,10.0.0.7:443
/a8762dc488e1a1af31e53af8ddb887d4f3cca990,10.0.0.8:443
/2e8e8e8fdeada0bbd33ba57d20209b4d9343f965,10.0.0.4:443
/0220fa8b9a256e7fcf823097759aa3c44e6390e3,10.0.0.6:443
/418c1c554186b78c11de89227fbc24ef128bce54,10.0.0.8:443
/bc86e565b76f8e6f560064b02ab26529b6064571,10.0.0.3:443
/5c6a9b50df69956bd2b937ce7871ba6d67678db6,10.0.0.5:443
/5726f95dd0b1b145ad1a06755580f42fea41ac2a,10.0.0.9:443
/db601a7f7e24504b820e5ef5276b2653ec6c17d9,10.0.0.4:443
/f428a38a0d3dbbb12d475aa8f5be917147175eaf,10.0.0.6:443
/b815ca5871d52098946eded8a3382d086747818f,10.0.0.1:443
/fc61e21e21c6c0a9e03807a2cad7c1e79a104786,10.0.0.1:443
/8278c52b97c1e805c1c7c1a62123ca0a87e2ea2a,10.0.0.8:443
/668fd6d99bfb50b85b0928a8915761be2ca19089,10.0.0.2:443
/fefbfb22035c938b44d305dbb71b11d531257af8,10.0.0.2:443
/c30b287269464a75cf76a603145a7e44b83c8bde,10.0.0.5:443
/7584dbc60619230cb5a315cfdd3760fe2e2980c3,10.0.0.9:443
/399b3bdce88319bdba1b6b310cfcbd9db9cec234,10.0.0.6:443
/5edc91979f6f38dbbe00544d97d617b92b3df93d,10.0.0.9:443
/ac740e2450803d9b6042a3a98e5fe16eaad536e6,10.0.0.1:443
/46013f26dbbde9c25de5fcbb92ff331d5614bae8,10.0.0.5:443
/f109862c7c78e8ce087aeff9f2368d54d91fd3be,10.0.0.5:443
/fdc13a7011bbcf36b232adde4c610f0f35e9147e,10.0.0.3:443
/8387a3c076e525cae448c6a3b22988a2f37a98fc,10.0.0.1:443
/b4739e36d8e7eba1a400925c928caf0741b1a92a,10.0.0.1:443
/d92612bb3f678d8b181fa176e0af7227bf5f7e42,10.0.0.9:443
/89ec56b1d8d72c888b044e8cd7fa51b9ac726a41,10.0.0.2:443
/7cf921d8181af6912676f20c3d961d3f2ffbad20,10.0.0.3:443
/9181876c839cf16fd7c8c858b7afdc0178fb9500,10.0.0.3:443
/1034a4394566c826888f813af75c396fe8082b43,10.0.0.3:443
/81ac831667e89c2c6b3c6098b598d99eb1ce2b20,10.0.0.2:443
/d9dbae8a03a430b8d9cbffcf622b4e379bc89bf6,10.0.0.7:443
/c67776793fdcf7553fe0cb6414bb9dafe0216911,10.0.0.6:443
/1ee25559aa4aaa11ec1b3d2cc8645ed05ec001b3,10.0.0.9:443
/580180a2b85efff1a393ea2449ae271148ca2770,10.0.0.2:443
/84e1a1904a52e43ace344346032daca4e1bb69d6,10.0.0.8:443
/9cd06ffa608a252a30d935d2ebf10eceda06ba2e,10.0.0.6:443
/cf85a0000f38ac5346ddddd8cc0c28a054bbe60c,10.0.0.5:443
/c31f22b05514e380dd4430086486dc3ba4e36ed4,10.0.0.6:443
/336fdd336fde2bde2e0132d4be65088953036175,10.0.0.7:443
/cb1e7e2c425607defdd725e81ca3121340dbc8bb,10.0.0.8:443
/7bd85bb6826eeb30a67a999bfdeb6f6368954a3d,10.0.0.5:443
/bb542ca4f154437b0fa394b3be8d45350efc4955,10.0.0.8:443
/53e425848829e3aeb1c6991512e1951145b2ce46,10.0.0.6:443
/a6ad65c1bcacb876b76165e741f35c98a09cbbf3,10.0.0.3:443
/1fca16e96a89623e2ef7a93fccd767c4ef2a7905,10.0.0.9:443
/b9ad129954c11aa1491552845199c2fb4bbff25e,10.0.0.2:443
/9c0380f918aeb44664929447077ee992894cb754,10.0.0.9:443
/a9aeb4e3fb0b2358f70a6d9c2ad62409a7c24574,10.0.0.5:443
/8d563416df0c167343d295f889d64dd9ff213a9e,10.0.0.7:443
/71ddc6cc8f25f63ad7df9ad963beb9a14ca6b76f,10.0.0.2:443
/1dd61ea19da5970147129b0ba635338bc93c7aba,10.0.0.7:443
/2c019dd0aebfdf9d94fb1201b25f443c91c034f8,10.0.0.8:443
/636b620e6d548492a0fac32e895fa64ab48fa70d,10.0.0.1:443
/e26420a446174c0bcbc008f3d8ce97570d55619e,10.0.0.7:443
/2522d660a63527ab2f74c7a167366bbb0bc46cb1,10.0.0.6:443
/6e585c3e88aeb95554f5c00730c70d71189a12c6,10.0.0.1:443
/0bc50da77b7cf3959612950d97564e91e5a0f3fa,10.0.0.9:443
/167872e2688593c6544c0855b76a99fd0f96bb69,10.0.0.8:443
/7842aa002d2416c4587d779bbea40f5983883a9d,10.0.0.1:443
/b3cdb310440af5a8a9788534e2a44e1df75fc0aa,10.0.0.2:443
/7c17fc177496c13dd1207388087ae1979603c886,10.0.0.5:443
/28865c3daa92ec1e3784c51e9aa70c78b902dfa6,10.0.0.3:443
/4b990fc439195c5e05cfea65a2453f23fc5bbf1a,10.0.0.5:443
/7261021a69a6478b0620315c231c4aa26fda2638,10.0.0.2:443
/d5caa3e251ad2dd28ba82c3dcb99bff6d368e2a0,10.0.0.1:443
/a8606508d178e519aa53f989ef60db8a0f3a2c2c,10.0.0.2:443
/eb797fcf3e5954c884b78360247e38566f7f674a,10.0.0.9:443
/289ced7bea19beee166cf4b07d31c8461975d4e4,10.0.0.6:443
/e563ce7e72b68097a6432f68f86ed6f40d040ac3,10.0.0.3:443
/ba22b6f2657746d3b8f802ab2303ffd4b040a73f,10.0.0.7:443
/5dbda23f45eb02ecc74e57905b9dc6eab6d9770c,10.0.0.9:443
/637691e12da247452c3a614f560001e263a9f85e,10.0.0.5:443
/b2e491e1528813c17dfc888c5039c9e3f40f9040,10.0.0.8:443
/a4575d09e2fcb4d42e214c33be25c2f1c10e8323,10.0.0.5:443
/d655e051b4f82c459b20afbd2ccca058e16ad3fa,10.0.0.2:443
/cdca39ce5deb7022702e18e0c6b61010ba931e54,10.0.0.9:443
/58b31129208a29d2435258dc9f24a6b851ed1ac0,10.0.0.6:443
/019930f0699b20a72a091c1042dfe33ac568b190,10.0.0.5:443
/f00117302e2daca8c81e68cb33cf445b72c45895,10.0.0.9:443
/da90cf74593ee181693910a40142bc79479c354e,10.0.0.5:443
/87654ba6f96f359e4418b3368ae2256a3c2dad51,10.0.0.2:443
/e85d0e6a90433b5a64257469c2cb4e441f39d07c,10.0.0.3:443
/8527e42c8677b3f8264a2a647c00eb3acc5d0207,10.0.0.1:443
/3adbb76ad6ae8a5342a5458e5f41ac4bdddb45fb,10.0.0.5:443
/96e7ecedc6c60f0b52869a98f9d192af1e72d329,10.0.0.9:443
/430095d6c47a7d2a8073e73df1c694fc9065e8f3,10.0.0.4:443
/475ce23ca92e83ebfbc781aa337063c6b034bfb6,10.0.0.3:443
/3a2cd1836406244cf08a552f60734872cfabfa1d,10.0.0.4:443
/47372a5cf6b640c32681f094dd588fa204839637,10.0.0.1:443
/74d7ecd706817756952727e82a5933549d582f68,10.0.0.4:443
/0c1ab68f17265ddc9a58577f2a3443b523508d2a,10.0.0.3:443
/e72871b3b2e08e87443995810c8fc542ec0c3b88,10.0.0.7:443
/20ffdb8b43d521aee3c81cbb668b94828bf3f86d,10.0.0.9:443
/b9a4b7d390a4fb62ea6252287351954ce6935fd2,10.0.0.9:443
/71f52570d9fa32e2df99088e44850fa9097804ec,10.0.0.6:443
/9533af016368e423dc90b4e249002233fa3fcd06,10.0.0.8:443
/23992435c60a48db0188097fb2f15826d99be05f,10.0.0.1:443
/bc351d376bcd7338aca33255199bfa3ced51d66b,10.0.0.5:443
/bc5a14bccb994f346069886be05ba91dc4cefacd,10.0.0.4:443
/6a29ff380492b77fe69f9ec0851cbbf7228d62f3,10.0.0.8:443
/99bbb0675c38e292e979110ac88fc7711edc92a2,10.0.0.7:443
/786105dc60dfffc8e2ea58679a14fd4428570d10,10.0.0.4:443
/d983235f5af78dc9b13a5d177a44c6a76c8fbb2c,10.0.0.8:443
/55163e01bc026cab4cf6985c8c2583876680aa80,10.0.0.2:443
/eb68e3145c8a531198ea2a60e7a4fe6cb1a2b78f,10.0.0.6:443
/7996a420a8e08545583a8ca0941c1a0c9ddc875c,10.0.0.9:443
/d8d3509e8df61eff246be4faa6630d5f11b81172,10.0.0.4:443
/ecd74f84dadcbb5e7ab90430ba424a996a5ec50f,10.0.0.7:443
/566ca8a48b0875bdf60d224188b0d952da6c8dc7,10.0.0.5:443
/0497f891fd6d35ffc0ed28dd3ba17eeba1301fa0,10.0.0.2:443
/6a406d220cbda7fad4facc04632fd0c12dc6d998,10.0.0.4:443
/3a54c0bfc41cd0942d0e479430cdbc551e33fb99,10.0.0.9:443
/a7a224cf1e0d9b4e5493b2f61fa53ad72de58b94,10.0.0.6:443
/4121200fe9e4e7c2126c5d71d108e5119f37783a,10.0.0.4:443
/caf4c4b46875bbfa63b9ab35a4bce5646ebd55b4,10.0.0.3:443
/90ad2be0a253536ab7c3e961443a91ded0e66e61,10.0.0.1:443
/caf569f41f3556f588fefc887d6ec0d454bfef8c,10.0.0.9:443
/0e3c3e157ffefdfa94e785d4a55f4eb6fca4dc70,10.0.0.2:443
/b0b8ba29e45725715f7982a05edac1ff999a7899,10.0.0.3:443
/cc5430ac1220fe146e68e9cf6f174269d403224d,10.0.0.7:443
/508445e1be7b2b4495f2eb5907530bb095e98ea7,10.0.0.5:443
/d6169d6f2495da4842a67163dcc0e5f31acb1a0c,10.0.0.3:443
/8d85ea8d983c0e35836b8a203660c6c919da645d,10.0.0.8:443
/ee5128bf7f95196d6569af52c9d99c4d60f132c6,10.0.0.7:443
/461d5e76ae9d26244e546eed7038efe6cf7d9bbd,10.0.0.2:443
/9f97615d8e9dea23c4c4e841838404fcd8698d8e,10.0.0.6:443
/c01e055c153b1d34d51c6598e2e1c3fc362d812e,10.0.0.8:443
/7c087772081d068f5fd86960e4d89901f3c06afe,10.0.0.2:443
/37e6e5c96c2661d244cbd243151f9c90119d5f4a,10.0.0.4:443
/663e532894288bb97751dda93f151d85f6c16813,10.0.0.7:443
/2b3904fd38fc96f184226c842f0643cd0596d865,10.0.0.3:443
/14cb69e56f7f17a26f0bdfce16dec5baf539dba0,10.0.0.8:443
/adbe42c7ca6dd63d976f49262cf3d1a27a5f7bb0,10.0.0.2:443
/70b58e27d6eb735c3c82d9aec1f6608f2f32195f,10.0.0.3:443
/e7d3683cca1dcc45d8e3fdfb54eddc9b34141d65,10.0.0.3:443
/407e3958ae8b94172af71487050ef5dc0aeab2ac,10.0.0.6:443
/4c5af9e573fc3e0120d322a950fcbb792074d670,10.0.0.7:443
/fe92a691ba1d11d6f49e5144be9baee390cc27e6,10.0.0.9:443
/298835604d35f371a68e93047c699a7c41375f97,10.0.0.6:443
/2155470425069f357851ba81346b879a8193aebb,10.0.0.3:443
/f55d45d265ec44be7ded0db1252281348fab75f0,10.0.0.4:443
/798f665aa334e5eb9a49669785e94da933d81f32,10.0.0.8:443
/ad8bf2624e7fc687b0130b61fdee9db2a2d865fd,10.0.0.7:443
/d2002a4943563ca4c4fc66b4ad65aac4e1410b2e,10.0.0.2:443
/a025e91fc9b3fcdc0491d0e4b4b0f09e322e53eb,10.0.0.6:443
/b4a46e8f0ca5698b4f6dd201b87e88125b153ece,10.0.0.4:443
/ff2a4976667b127ca1e3bb5027e8a836e56fd358,10.0.0.2:443
/307086130cdefaa3d899fca3dd9e77047fff1cf7,10.0.0.5:443
/558d5eeb99c6f1cfd6367fb101392072e5140c44,10.0.0.7:443
/a1a3799079c1ef01be067c4c6a1db5b7fe6515b1,10.0.0.4:443
/5b66932db9324bb9f8d6fc1f7be819c1c1ff43bd,10.0.0.5:443
/1d69b12d308183c0d6432fb4cb8bacbc86193830,10.0.0.8:443
/eef4c8b2ded3656c9d6174a72ffc487f0c769492,10.0.0.2:443
/eb439a2cd0e4c9fdd95d8c0f657a81ce20f96a0e,10.0.0.2:443
/b6f64c4a87c0d38417ce3dcc7a553a185df7f384,10.0.0.8:443
/393d62711ecc6309a19a96ea73cffae546922f64,10.0.0.8:443
/aa18663a595f369e048e33505f82d21ebbfe354d,10.0.0.9:443
/759754a69ee3e4449bacd21a5866b8434b743cfe,10.0.0.1:443
/c01e96c10fd69b430cf67edcc3fd2fec7ba30097,10.0.0.4:443
/284e0c7dbb8e7da2a1fd7180f8d542fbf2410767,10.0.0.3:443
/6f360332b72940cc117999224b5be35551a1790a,10.0.0.5:443
/a83eee32d7132975d5d2d2848bc7881345e63735,10.0.0.6:443
/9d8bfc97428dee1b1495d2568e5ac68b8ec7973d,10.0.0.1:443
/9e09d80d5653ac55445b42c091ada230ed96cf67,10.0.0.4:443
/6ca8d4fd764a20ca1b766f9d2a14b81011d80da4,10.0.0.5:443
/fb89be9d12828716f95a60d092f2a028c876259a,10.0.0.1:443
/29ffb1d20ace9afed20ce8613a2b636dae70638f,10.0.0.6:443
/b569fa1c31949a8ab05a60939d44b1132534556d,10.0.0.7:443
/71a89db0bb322607a2557b089a5d160fa574fc7d,10.0.0.1:443
/4449e3e6404cecdc9a36ecff54babedc84619b1c,10.0.0.2:443
/b26294352e342bd6e953264f9e14393413bb371d,10.0.0.2:443
/a72621f8691cf08ffdc5884556d5512a5ecd1f6e,10.0.0.4:443
/dc4732cfa991632b719def815b228ded96abaa1e,10.0.0.5:443
/b908128cca7c859493155441660eaaa09b2fae80,10.0.0.1:443
/d93c9304c07c8f1d2b6f6c89c882fc2cfad3fefe,10.0.0.4:443
/8a0db29dc8df0b7845a9ab213d4bd8ac59a121e8,10.0.0.7:443
/49559040bdef5e1a5dc8ee89f897b79115ef1bfe,10.0.0.7:443
/23428c6b465b7c43629bc28fa1a7431c6e541778,10.0.0.9:443
/9db1610e40a3197a5b8c2d0dee2b2ccfe4cabb92,10.0.0.3:443
/1c6cf23cac024d126066771bae7af48ba141dfd9,10.0.0.1:443
/5e89a982f7f165b47fef959e10c32afa1e01783e,10.0.0.1:443
/52644098601b604c9e9e5e3d1150f13e81240fc8,10.0.0.8:443
/1771afea4cf491711aa3b608fbd8b470306d7bc9,10.0.0.4:443
/825cb4d51b986eef44d3cba31dd87c4ce3d9c159,10.0.0.4:443
/83a6211a968db8d62e17525ce593c144ed7fbb4c,10.0.0.4:443
/6a9abd46a919eed40be39b9d53bd73cb74acf540,10.0.0.6:443
/12db006d907a255f8d61e5070d1a41defdae27ba,10.0.0.2:443
/0cf51c79b9d115d7be8fcc104e2f51fee1a3caa6,10.0.0.5:443
/6bbab5e098876a84c403ef8cbe9864c21f9bb0aa,10.0.0.4:443
/5fc725bf869cf190f8ce82814d5e8e749030c8cf,10.0.0.1:443
/859d96b17c00e528c07fe1696fc7ddfdb34c4875,10.0.0.7:443
/a55638df8b2ceca37d24bb78826833deb633c79d,10.0.0.2:443
/70ed2f73f55d4d00f9cf694a7f669c3ba11f89ed,10.0.0.3:443
/b5c910057d813197f8353c31d233de719212455a,10.0.0.5:443
/b602d274d7d8ff89505fb3ba364b6ccbeeb561ab,10.0.0.6:443
/50ba092d17178b78c2643e798138ff5514d2d0a2,10.0.0.1:443
/bf3244d6cec5c60aa29ccca799415354607b7803,10.0.0.9:443
/7f4ddcc20818c0db3cdd8b440c269e33ef22a7c7,10.0.0.4:443
/9dc2eaaf3539a7c0a5b97be1f722f544539c6257,10.0.0.2:443
/c5359e50f3c202f5cd5c096bd15d757ba659e815,10.0.0.5:443
/038366c13ffa60a0d9ef4bef212e6e7354a6bbfa,10.0.0.8:443
/9e40dac2f57fe43878519a83af3b75fc2e590217,10.0.0.6:443
/9b2c05c1d561f86cf9682673628dfef2160650a8,10.0.0.5:443
/78a2ea21a979d1d0c8e07f0185f358fe58393c12,10.0.0.5:443
/83d46e2ff9cd7bb557c1b00533a0e4f1733df84b,10.0.0.1:443
/29bf196e578a83824c55b0f78ceab36b1eb9c82b,10.0.0.4:443
/61249cd3d39f4dae802db5f0a875a5a4a8ad191d,10.0.0.1:443
/c7c7dfdf8e9e68d5540aae13b2cbb5fe86c1b965,10.0.0.6:443
/4be4e8d7897f7d9dfa210bd236e9bb45454fea20,10.0.0.9:443
/cb5ed875dedef2013fab5b051a8636d10fef56dc,10.0.0.6:443
/e12ec1f2b657ad0f7988db38254652e153525ad9,10.0.0.7:443
/9ec5a64e415451efcc8aa7648b284774361e03eb,10.0.0.7:443
/3a6afe9c8e8f041a59695055cb7733ae254632bd,10.0.0.7:443
/e3393950cb37481a7b00cbefc3298d14aeda0807,10.0.0.3:443
/7c6e41537748edb49cfc56ee505256f40935a99e,10.0.0.3:443
/6bbc445ff57bc9c54407f31616f1b23bf5ee27ce,10.0.0.5:443
/99ba1e8f21532dab31caf0731f1c5edc8455550b,10.0.0.5:443
/725fbb619d38c436bb88e28d5219e720989ab6db,10.0.0.4:443
/7b519ba8928f440bf01ac1d6b98611fb59bb1c89,10.0.0.8:443
/2ff8d8dd2a37ff1cb34692a00c7fb7d1c155b419,10.0.0.3:443
/f76abffc1a71b95e7969cceaad57429672beaf68,10.0.0.3:443
/fe58d58e116026db4cf106ef57732e1b629caade,10.0.0.6:443
/45549ca0d7c95e97c299b58b03ecf1939e140c9c,10.0.0.1:443
/93695453157442d799a007d1710f7dbf968be8f1,10.0.0.9:443
/ebe69b2ea9db3e66a2157021a17f852695eab8be,10.0.0.4:443
/a885aecaaf297eaac5c98ed708fe6a73fc9273b8,10.0.0.2:443
/2859256b987358b8d2ee0c81b5494cde3a98d602,10.0.0.1:443
/d19ae90e456730d2db6b36c1ed1a45335b368fc1,10.0.0.1:443
/f16f2e87bee62b1523dbb5824b5dfe338ec67704,10.0.0.8:443
/fcd5f91888014decb190a9dac5fe9fca7ed8d70f,10.0.0.9:443
/3ee610b32554b5f7c27d40a52bb982378ceb4fb6,10.0.0.7:443
/21cc5cb90ba59b6b743bc437f0f93c45d21aaea9,10.0.0.7:443
/8d2bffec729dd863e6dcdaeaefca22d6e29403bb,10.0.0.2:443
/2ce6b015ea081b69a3867f7b09b753f83fbd4b77,10.0.0.9:443
/64fcc9606275d6a259a084696318ab704a81932b,10.0.0.5:443
/0984409349566b9bda3f5ff3b0dae93c6979969c,10.0.0.6:443
/2b3775815cd0064c1603ec6dfe62b9ff54180638,10.0.0.5:443
/563ff0fa8762400c92ccb700adb6ea6a7bfb0d33,10.0.0.6:443
/901f7c9eca3f038ecf6a684a2c46b827c24e8ee6,10.0.0.5:443
/3dbd852fb7f851fda48f742488e51dfd8d4a472e,10.0.0.9:443
/a50ef8903707c1c5d7158a851d636ef65e198e7a,10.0.0.7:443
/92603aec7e7f7a5847f523c336bd80d786667d6f,10.0.0.4:443
/a941b070f313629549a2874fef17b29b25069214,10.0.0.1:443
/9a80624738b37b3a3d6b0749feae2bb82d0672c0,10.0.0.5:443
/f863b682f5f260f4762a14831d949c5dc9bd5f28,10.0.0.7:443
/d41f6919aa10ee037b4a69df874de03ccfc6432a,10.0.0.4:443
/e995303d36162db8650a2802ce0d52263c29ec0c,10.0.0.1:443
/7823ceab6e649edbb4f99d62282fe00edbe3acca,10.0.0.2:443
/bfd84f41dfe1d4470730d0aa41eb73b9d7461503,10.0.0.1:443
/53f7534ee600e63d0b32bbc1f2f9e4794373c4bb,10.0.0.7:443
/26f4c39897fdec0b453bc15860a45137064c4ef8,10.0.0.8:443
/7345179e10fa47e31faf60e165e7802f31315c56,10.0.0.8:443
/d47e4a2590ff8d5dd916d826adc3c20b9224a3de,10.0.0.8:443
/8ebb8b58c53468143f882b186fb64ef14e962c0a,10.0.0.4:443
/7fa7b9821ce360682b88b07fa27158af8d4b10bd,10.0.0.8:443
/7d3b908d960f61cf4944ac52164eaf9890c17c47,10.0.0.3:443
/3900dbeff282a20a6dc0b450581ae27f44230f75,10.0.0.7:443
/327a041d0576f11ba4c0fc677a8b1fa7cdd5b215,10.0.0.6:443
/20450e190c6b829846d1a67e43b2e57cf7e5b472,10.0.0.4:443
/d6d97ddf81c5a8f4f11b87198a3f8e75814d09ae,10.0.0.9:443
/48a468d706a7cc4b07c0e74695a9c2f64012b02a,10.0.0.2:443
/35903e2f79bf054b45d9f342642d488b85ec086f,10.0.0.2:443
/4198c731ac8a3638a955ae891498ea4071b2be10,10.0.0.7:443
/575be0ba8f57b2650f53499ab19fcf10aca1a467,10.0.0.9:443
/c211460d038ae3aeb286e759dbe99b9084c56fc1,10.0.0.6:443
/7d5071d6ed21ce66d8887ee6f88bf8b3145d417d,10.0.0.4:443
/77435459761c415127dac0d314fe73b728e93816,10.0.0.2:443
/16a401100431531a7cd8528d6ea8f957df584e4f,10.0.0.4:443
/9b9af306b3fb801bc4cb127118aee22f4678c6d0,10.0.0.2:443
/4902696d40151e903ec5bf810f2b82af7bf92799,10.0.0.3:443
/3207830ce45f38a326cba44a2bbed7ea7009e7f1,10.0.0.8:443
/002655dd3e576dd2be046915f365ee7947c77553,10.0.0.7:443
/8a316dc9861784929ae9283ff9edf50fcf2abb77,10.0.0.7:443
/8b2639c2cf4f75723ae219f9c8a60779e93b3a50,10.0.0.2:443
/d135a3f32a0eaec83386f9b8167c8b351fa0f9cd,10.0.0.5:443
/3cb5d50669030262c50b916b5e5f0ff112a23f87,10.0.0.5:443
/791d86b7b2c2860da849c6e20006b3f5f92714a1,10.0.0.3:443
/de08fde7bd93bfd844407842d09bc163675fbcbb,10.0.0.2:443
/0576ce89f317cf54673e20eb664bb8992c975a71,10.0.0.9:443
/fd7244f5203e2985e6c65ee07686cbd2a489e21c,10.0.0.6:443
/233de62d4ed3f6e6a8d847500ed8be500970bd0e,10.0.0.8:443
/8b8ec68415a7a9cbc426c23ba98ad165a434fa1c,10.0.0.6:443
/ec4230ec3e8fa6600907e777c94f2e59382b4542,10.0.0.7:443
/bd220769eedf9c7efa641de459810048891e3dc6,10.0.0.3:443
/9165254e59f4fad93b93a02210b25dbcaac4e0be,10.0.0.1:443
/0adb6ec07cfcd61534a065db496c7042e97391fe,10.0.0.5:443
/39a5c89484e21a243c7061d39dbd236c80d4ede3,10.0.0.3:443
/dc560955b3b817db3e79e37255cd18bd66a39a22,10.0.0.9:443
/9d433be2cca7907dae1b8c24900edc5adb6065bf,10.0.0.2:443
/2531e51eda6b68cc2faa7a09ad032387b2676523,10.0.0.9:443
/d591b928b7f89b00458ea30ef6f4fd20cd7e41c2,10.0.0.7:443
/9720475f8d148f70245ade435243bfba5a1ba559,10.0.0.4:443
/2544d73a3c1b0f04829284a5b425607f4f61ced7,10.0.0.2:443
/e3af59332ba621011d98fbf2a38c8a0b69b9ca79,10.0.0.5:443
/d8f4c58c0db28d7368f453d41abd59f6999b3ccc,10.0.0.8:443
/8a6180a589aec21a274a3f47781ccb1311b0833f,10.0.0.6:443
/83aca0a94c4883adb8e7ff795c1008ed59052691,10.0.0.7:443
/adc1ab7741effd4ece0e832c41d1fe69f5e1805c,10.0.0.6:443
/35a50236b60e680d3d968ad3857525a8649fd6a7,10.0.0.1:443
/30495b101ac5318458d74b3a286527e164efac53,10.0.0.9:443
/e2067038a82745a65406516f15817b63e328a825,10.0.0.2:443
/754c3e717cf1640d11ee1ca113571fd0ae55a0c2,10.0.0.9:443
/c4462289d891b8b0c0783041044908bd347a27a7,10.0.0.1:443
/68129869b04bc2255d2a17ce01afb14f1be73032,10.0.0.3:443
/683a4f8e369c5c3eeb85f0779aced10809bbdbb8,10.0.0.8:443
/8da89d7686976482713413835c889a7f289174a7,10.0.0.3:443
/511bef26cd42422c6f0c9bd33714a07b06dbb3e1,10.0.0.1:443
/b871c4e41b3eababead2aa4dbea87fef7161affa,10.0.0.8:443
/808da0dc7d4025a0858eec92ac72e9ffbff233c4,10.0.0.2:443
/6fee636398f916c4ba0074fc327f7b3dbf683a8b,10.0.0.2:443
/03ce09b1a7c7ae719f66f489841d0ff11635ffc5,10.0.0.3:443
/8653f568fca5173d6d274060692912676709981f,10.0.0.9:443
/8f8402b8bba56124ec6de0552c1844bd76bf72ea,10.0.0.4:443
/2c89ed1b71a52c9b0a9fa7909dc87d4d06237216,10.0.0.1:443
/5279a73f9dcfa562f13180791932598ed8a067f1,10.0.0.9:443
/456482cc45669a59fe8af5e49648a8079bc35c06,10.0.0.4:443
/90350e060dff6a507e69bd80c38629a2d9bf12b9,10.0.0.1:443
/2d5c624c50ba3ae06782861bba176e9b2f45f529,10.0.0.8:443
/67077af97e65ae301e88c6cd0e87c7ddb68fa9ef,10.0.0.6:443
/f017e954321efbfe4046942be0a1122d9be81d52,10.0.0.9:443
/0b74a181ce4b4f43023e4bc0acd7770f2867572f,10.0.0.5:443
/f40f8509ac9f73516224825e88a220ca02db2d81,10.0.0.7:443
/c2f12ede0ced03c9357a4fc5e05e9af5652433c4,10.0.0.9:443
/d3ec86f1dde7e9c416c88ddbabf854e21decec2d,10.0.0.7:443
/ea29487fa7e1c9e79ff0f257bfd8241736ddab9f,10.0.0.7:443
/b8f4ec5dee59a8693710cb95e1734900d7b6b076,10.0.0.4:443
/d0be164540802b86de0762ea266e03c8859ff70d,10.0.0.8:443
/cab9ed312a56db577bc36e4c2f52e84f8abb09ce,10.0.0.7:443
/62dfa34389964b03792842c09adce33e7decc837,10.0.0.5:443
/f653827f1289ae68efd5a0d057fbc172f8352842,10.0.0.8:443
/dbd9b9cf5affa501ddcd1a19eefa4240e311f94a,10.0.0.5:443
/3e74e167ef6393b6544b4e75da97f30f6c2e6477,10.0.0.6:443
/d004b31247c668c439fc8e491f71a69dfd35a55b,10.0.0.6:443
/17e79540d401ae73e7d666444feececf64602d23,10.0.0.8:443
/06c9cb78908d623842c4a7c7baae3d55009ffc43,10.0.0.1:443
/64427a2e50196a34670b9de8a4aebe44cbb26cc5,10.0.0.2:443
/802741e276f10186ee9b63d47af006e8bc3de516,10.0.0.1:443
/10cea480539356be3f2a2f14c05f057a60ef9b10,10.0.0.5:443
/17d5b6820d78727b781be06cbc7cd2a9be650794,10.0.0.8:443
/1c0f7ec3d8919ffd2ccb3312fb7d6d2e15cd3133,10.0.0.2:443
/ef3afb81312d46b826f033d9adf0c730996e7992,10.0.0.7:443
/080e45e7955e797bdc906af2fabeb8fbf2ac48e1,10.0.0.1:443
/c043b1a590f09716da25328fe0573c8e2e9c0bdc,10.0.0.9:443
/df604b478f31f11b4cb291b1a393749ce4e72ef3,10.0.0.7:443
/32a8a7a0678c834e2cc7ec0584cd193fd1fd91e5,10.0.0.5:443
/8303fbffee5f38f8eb4a51f3c1255de830abce34,10.0.0.5:443
/24c16585ac0791c7aa1d8a16bea2fae9e7008cc5,10.0.0.5:443
/0059d4d899d1e961f249a060d91e932a89bc9b4e,10.0.0.4:443
/f1edc74510b17b10cdff8801776d4eaf72a1cf0c,10.0.0.2:443
/faaec2162ff441c490fd2dc0640bf2c941438995,10.0.0.3:443
/1159fc0cd3e683f92fc649aad3a4bcc34564c3a3,10.0.0.3:443
/2ade445523d26c3b1519e699590939011036217c,10.0.0.7:443
/cc6b2c1b20ef1a63adf2afaf1207ecf446fb5719,10.0.0.6:443
/8cb65496c9ebd858fb1101d85cf35467c4a0be17,10.0.0.8:443
/38a5bad44dfc1ee47585be27cbd9598959ea6caf,10.0.0.2:443
/c651109ffc0f2270596065009642fe3fe0529e60,10.0.0.5:443
/b71ae7d9c9d4b67677fec2e630313cd01cd130e5,10.0.0.2:443
/088d49f180d9a2211708c95d5e9d6986705c8d7e,10.0.0.4:443
/8e44a5bd8519aa7b6d85f01e7b01e1a2ca236b6c,10.0.0.6:443
/2526eec4890f03477261584646a0ed6def65f8f4,10.0.0.8:443
/3a70dff8627a20ee109fa8241d7da762a02e02b3,10.0.0.4:443
/eb999a0198002b52abca1fd424a44013989e1403,10.0.0.4:443
/22a12a10268929d77ef8dcdea96f8aa69ab92d8a,10.0.0.7:443
/b467e43c88f0e69b52e3d341fcee52198f90cf77,10.0.0.1:443
/c5245921b3f6a21adafd6598046957086edfbeea,10.0.0.7:443
/6229b444f64a7529dd956877c24b2d149a1debf7,10.0.0.8:443
/4f8b5505d9e817b39e0b68e196c62240acd07306,10.0.0.7:443
/8699dc12aa122266013234df69eb5e14d6282174,10.0.0.5:443
/094dc8a3111d132d294030b358c23e63ff2ad680,10.0.0.5:443
/3ecdf7b29e567a94e1548f0884f414af6539f974,10.0.0.4:443
/abb4e33382452c2f5c5d3d0b89f11cc2d497a3a9,10.0.0.9:443
/696adbeedd54670bdcca6356597b580dd9c4c42c,10.0.0.8:443
/940b2a895c5540f5dd079f7ba930962980bf4e77,10.0.0.7:443
/e3ad3f730ef0f82530373e966fc35e7521ba0fae,10.0.0.5:443
/e68f238fe1b9f3ebffb102e98ee2d958194f0c3e,10.0.0.2:443
/9e09afd63284f764392213c9f282e59c859ccc2a,10.0.0.3:443
/40d1f0a5f73460d07f777d16ce0edb5215e0af5b,10.0.0.5:443
/e641c93505fff2131a1bd6a2bcdeccc7ef56e108,10.0.0.5:443
/51f5ef58442e9176bf4ab0e3d2a31e3919314467,10.0.0.1:443
/5c752e882a791057babbc7c9d0fcd6f98249a90c,10.0.0.3:443
/1a93e7ac8192b94e62372c526a858203ab55f82c,10.0.0.9:443
/4f00d996e102527cfd906afe596b1fd20b9587e2,10.0.0.7:443
/cb33e35ff6809c1d9e87af168853c8779949df28,10.0.0.1:443
/8238f15b1556e4c5a2cfd3024ca22ca0cc38cf75,10.0.0.7:443
/4d9af33ca789f5675e0b3b7db89277d9f07fe487,10.0.0.9:443
/31f11a12c1729fbf56e7924ddeaef596d9246ffb,10.0.0.2:443
/dc5b466b915a7eb13aa3c28a6df11201defc4776,10.0.0.1:443
/a83dcc91951b9b9bfc86dd414ab2378dcb74bdb8,10.0.0.8:443
/68dcee7d8e5b5bf81c1acdec7dd97891ce3aca1c,10.0.0.3:443
/661f218fa06f2b92f22e0ab81731ac4029f4adc2,10.0.0.7:443
/cd29f2138dc83eeb5d4f16d7b13d0c81483959a2,10.0.0.9:443
/d34d0b880b1e7499c2133f06e9373f5ea4e841ce,10.0.0.6:443
/1fb666f29371b67fef8e44a9322c9e387261fa18,10.0.0.2:443
/808b776dc196d1759afc93646bbfca2c3e8074b3,10.0.0.2:443
/20888fbf1def43e3123b0a9e4eba7fd7d5f2a410,10.0.0.4:443
/d7cd6d38cbd5b94c432e15cd2f7502dbb306d757,10.0.0.9:443
/fc018aa6e4835271d8cd024ec3943115cf4e94b0,10.0.0.5:443
/27dc43e54f5d904746a36b482123346f44293b51,10.0.0.5:443
/ed60ccb18540ad211073c281aee0dfc31ccc942e,10.0.0.4:443
/5fe5f6c1b4d13ba7200d6a89c456f60eb36690f3,10.0.0.2:443
/e2e2680aff762da702475c5307a31779cab5595a,10.0.0.4:443
/b9a3d570d0f60b98af06bb2bb8f20bfefa8bc4f6,10.0.0.2:443
/6b476c8bb82dcf0a32e3999826a788c48fe83e7d,10.0.0.2:443
/80a371eb8a4a18395199455f8bde883ee548e4ba,10.0.0.3:443
/de87fa6ef77299db61ac048fc87ce6e3e39934bc,10.0.0.3:443
/52c42c3136fef68070d143e62366e47b5de255cf,10.0.0.4:443
/ce7ea948d35ff4f7409eb507cfd3fc6e3b7cf30a,10.0.0.6:443
/53e772cf7a78bda717c98a08b9081a40a03392da,10.0.0.7:443
/9bcb6eeb362e0f6c8c04325848cd93f1a4fb75b0,10.0.0.4:443
/2ef946b7546ba83f528e4af5f60a585fa22cd5e9,10.0.0.8:443
/a964b047adfe079bd4f39c5d79daac5317611bb2,10.0.0.2:443
/2cfad7113f585441f4fd054df57e08aa3f7d3441,10.0.0.8:443
/f5352d8e5f1e080bc70c241a8e65cb48690bf44d,10.0.0.8:443
/7108e1b4b2a8d73f6a892bf394d6db68eab4b06b,10.0.0.8:443
/5ce086abaaa907ea06ad20e029375d0a469f6688,10.0.0.3:443
/e8c9dabb0ae5ff95db4dc58d5e5ad8f49dbe1467,10.0.0.8:443
/3ea868d97d0f366875936dee5944d30f133a11ee,10.0.0.3:443
/4c34d28c12494292d3eb30c70e2ee5158ae45bcf,10.0.0.3:443
/b1367bfef3363e82dce33834cccd1b6178dd4e01,10.0.0.5:443
/e563665cf03942d218f963ac225b9f65f6f47e44,10.0.0.4:443
/ae4a64bd0638678f1f19b31f1610e800510308a8,10.0.0.3:443
/581ee1ca9a6cad9786fe37cba35c4809410a902f,10.0.0.2:443
/d8065f52a7d18810daaeef95f724c63a843f9354,10.0.0.7:443
/461f1d57aab6fa3c1bf72f5aa724ed264647c4d3,10.0.0.9:443
/70335780553fc4499c3d7b903b7e6fb6f06bc47b,10.0.0.5:443
/9733a49859bbd9b971475bd40e4a2bf7d9bfd203,10.0.0.3:443
/d3d04d2aa72354af7b52b344a07dcd22dca462b8,10.0.0.2:443
/c5ea5378e9a319cc2a455d6e296dd3ca5aa12477,10.0.0.6:443
/f20e309d5d7d97489cc0d5ad3737af32008968cb,10.0.0.6:443
/33d387521125b7825c8480755efce6e298e936a1,10.0.0.7:443
/a99b72b250bfb58ddf9d6fa9f805e2873ce0c229,10.0.0.8:443
/c8997a1855b1b4d8c6fd0a9dc21c01ffd7a1aa0d,10.0.0.4:443
/43b0d060a125208528829cbace2759c05655c8b2,10.0.0.5:443
/e609d51a78b4e83d2b778fef69d00f07a66e586c,10.0.0.4:443
/e62b9cead57079d3290ac764079b6466f7340c9f,10.0.0.2:443
/d31c487a361be7abb28a0b872d87fb41a907dd2b,10.0.0.9:443
/b17261bb106af0ee0cc6c710439eb26135dd5f5d,10.0.0.6:443
/14d92585f8702ef9ff0fd000907f00ca1f6a458c,10.0.0.6:443
/fe79953554bb5aaf34adc6928060af206e8fa993,10.0.0.2:443
/cefd63669258fb86a920a65d6fd1d511c508d954,10.0.0.6:443
/143c0259d1759575c859b902f4e56ef478624989,10.0.0.3:443
/50315f90fc6f82d29d925e3947ba7c32f19b5611,10.0.0.2:443
/95428f6c4dcfbb2a0c17a5de4861de01bd3f325f,10.0.0.7:443
/5ae459d770c6446c4d33104734b7a0293b48ce32,10.0.0.1:443
/7078c3416e5b78c65c318b3916b5be6d92896c88,10.0.0.4:443
/a489b81fdc1b4020b4bd3af8b759466c9023cbe2,10.0.0.4:443
/6a6b490f616c8b52c4548b3c7f46d527662bcb5e,10.0.0.4:443
/a607e4a8ef834a9ba7575729460fb697f5247cf8,10.0.0.2:443
/0ea9ba29578fd164f5942d037366c3c6768bdda2,10.0.0.4:443
/d9ba5006e81ac5128c20823155f4cfe991598179,10.0.0.4:443
/b691e1e01b2f64c9c584bcec928345657b95d293,10.0.0.7:443
/44710040aaa84b18986e3f16bc18e3c6ae63169b,10.0.0.7:443
/9ffc3d11bf5b67e6e316646e64f1b703dd79bd94,10.0.0.1:443
/1e2905288bed5c9b4c3f03cd6daeede56c2ffada,10.0.0.1:443
/1d54d79aa491a886dfda64a7780b04161d1481a0,10.0.0.4:443
/318d2d5e7b7347704e8a0552cbcb01adaef758a9,10.0.0.7:443
/22b7965fa39aba2195e1fdc1e845e01f378c3d51,10.0.0.8:443
/3cb017f3b5edf03f68ba84c5e8671e959273dd69,10.0.0.5:443
/60c48c28f0ffdbe1225b02ef2fabd26364126d73,10.0.0.9:443
/50330c984bb2167761149c43868e27401c678dd8,10.0.0.7:443
/ee359ac7b7e76cbc38f5a70cfb4a7323161e8519,10.0.0.9:443
/6dc489dc87cb4c097812bcbffa666f1d3a06920b,10.0.0.6:443
/b084378cffd7ba654dd33d13ae00915abddb5acb,10.0.0.8:443
/de8d58cf5b06da64685d35ea3d85692dd98443ba,10.0.0.2:443
/926ad3a9d1411e8f959f8a99270e534c2fcc60e7,10.0.0.1:443
/6f5efb290d2fc1f49af390123a8d45242535ba6c,10.0.0.1:443
/be0651f76c2d6b3fea26840eb42ff84603fdeb91,10.0.0.2:443
/664dfe003d4a68cd9502992896f6698ac2930b2f,10.0.0.2:443
/d5c1076ec326bbbf73e06d110ddec314bab4901e,10.0.0.6:443
/12bb31b94377990b52936138e755d30e1df5c4ad,10.0.0.9:443
/e4e3d4a7766b7ef0038e05c72d7c60c853902d78,10.0.0.6:443
/f1ace48a0bd8b225c5b05a4f310ad6146caab520,10.0.0.4:443
/d6f4e19fb0dd901454337ea0a62a671f7e731b1f,10.0.0.8:443
/e7e149ae5cc3b8d3432a74d2a5eff884304f149a,10.0.0.8:443
/55f9309e65ad34333635e2392170a2d9e8e80f62,10.0.0.4:443
/90cc256a42fe5c65d3b1c261b6713aac2d668405,10.0.0.1:443
/b9530da73b2c2287f52955290ec2dbc0e2cd197d,10.0.0.3:443
/45a8ce0efc2197c63f1b0feddd1d0ea3fb62cb62,10.0.0.3:443
/af5eb18f6287ead6b0d8d9d4f740916eef27b921,10.0.0.9:443
/7d4c22a4d647cfc95bc563ce3f0d56217623ea40,10.0.0.8:443
/6f6d1b8ccb3515ea7a4dfbba821b7e24f0af890e,10.0.0.8:443
/448153aa0816ea4c11fd433e4011279a9e911319,10.0.0.9:443
/e1db3b77b5891866a1bb0f136e9a3b6f1a8fd7ac,10.0.0.4:443
/8e5ddebd1bc756119aaa55d408583bf07c6947f4,10.0.0.1:443
/cb7c13c7fd7c12a0226dca64d01f154380b26ad5,10.0.0.1:443
/5b5987f50249480e48251ab6f848580d4dc69372,10.0.0.6:443
/a81f2d7b7e3640837acc6d0532aa52974371215d,10.0.0.6:443
/4fb00e1ae798e403dee221592d1b63d8a23ec7ee,10.0.0.4:443
/06ecb7c80ee19fc3dcdde75659cbf0fc1d03d5f7,10.0.0.1:443
/4c3644f13f8b64a3919d4c8a53387f102f8efbf1,10.0.0.1:443
/82d78d0be6134b4da73a81a442ce4c5277d23d8c,10.0.0.3:443
/968765803dd3e6461c0ff78f95342c0c113e4991,10.0.0.5:443
/b49bd6e5e66970eb7378c37dde945cf542601636,10.0.0.6:443
/7789c5109794ebfb0fe72e3bdc2fc29baa58e537,10.0.0.8:443
/29857840ad23d55c82c93cbf7971b416e118985b,10.0.0.7:443
/9aab0c35922eadd6e1cf5512af5272b20ff87638,10.0.0.6:443
/094cd8c29e8621928b163ba64c52752f9dfe5ce2,10.0.0.5:443
/f56b7ae1b06eff812326334a68cee675f3bf390b,10.0.0.5:443
/a5a029d6c0f758aab2eb8fb56594a0e2e118a8a4,10.0.0.4:443
/fe56c1fc75e419f220ea6bf06af6f5321c895f6d,10.0.0.6:443
/0d5b4bab87051120251a86b92db52e12582ffdd1,10.0.0.1:443
/34f43611722818ef7baa47b8d07067658765aab7,10.0.0.4:443
/e70ae66289144a1adae07ea70f507767d57e08d2,10.0.0.9:443
/3f10a40788eb0581f47d1f7ba7c57cdd6beef74f,10.0.0.5:443
/2d58723eabb6c5adfb1fa239e34c30da32f117a3,10.0.0.6:443
/efc167a6366a437ba465c1fc71ec28341082ce94,10.0.0.3:443
/0bfb80585b1a0fa49c2c863ab39dbe06599681b0,10.0.0.5:443
/57febded047c2a5455d7662d4967ac0666fb58bf,10.0.0.8:443
/f06e91822c1eb12e9c560453b2947d45c961ebd5,10.0.0.5:443
/2292f8823bd4364769bfdd6ef106c6186082a345,10.0.0.1:443
/dd37a086d19829dfff28195b3b63a197193f154c,10.0.0.1:443
/74f336292ef253b07abdce03a0f0d47ac6482062,10.0.0.8:443
/22eb9772d6538887bfc57bad65ef8a72d7ede1d6,10.0.0.7:443
/a1df865f8909812c4a83e29efe67460c6756bc17,10.0.0.5:443
/82f35bed949dc360e05c3a60f8d30970f9f30fcd,10.0.0.7:443
/e6d5dc89bb8d677be8c2dc6cc1f164a254e4436d,10.0.0.3:443
/dc64f2b2db6cbfb3129d83d0a5ca4d18d45ac35b,10.0.0.9:443
/d7b8e1a5f574730502d6328885ffbe76a93726cb,10.0.0.5:443
/e98e4eec7b294b9be385e3f87f5a5a86c4b202fd,10.0.0.7:443
/7c78c7e886c0a735c2043ceffd442e7a3df6de18,10.0.0.7:443
/53fcdac505c2e9d61d3df43336fbed32d4d52274,10.0.0.1:443
/e4e48c52272aeb71e75533c0447168a17dd0a8ac,10.0.0.2:443
/ea3361f8b5124bc65fe4273df0c03177066e8f1a,10.0.0.1:443
/7db8dcb86856a592128f019df1696d1bd8f0729f,10.0.0.9:443
/b8b33e4984ab558303db0b2481d8b6ab52bbeba6,10.0.0.1:443
/a61755fc46e10c87e457c3fe06a46ca261624b33,10.0.0.1:443
/74e9e54ea37b089a100561e9c89852a47d738657,10.0.0.1:443
/7daddba4e4b97331ab3e47976295aa522c7622bf,10.0.0.6:443
/9a4a68b8ccea854a22b1064b2b647270161df2b3,10.0.0.2:443
/5b42f6b8ac65a644c8203a0e1f44d452653d24c4,10.0.0.3:443
/2ae53235a7dd1ee0ae2e0187969e33daae6466e1,10.0.0.5:443
/cedffeffa4fedb95411a03e7abd3e7771982e5b8,10.0.0.6:443
/56a5eb3af8c0de47c03da5850dd5a41bca391bf4,10.0.0.9:443
/8770f96efc90e2c044835618c1fbc21f784148fb,10.0.0.4:443
/77b4bb2780a93a397dc469fd804b05a3fe6443a3,10.0.0.3:443
/666b5de9ea6b7768dc6638f1426b5292465de061,10.0.0.1:443
/793233793175f196f861bbd781cffedc1dfe1649,10.0.0.8:443
/6e6c4ca7b99531e8ed151f023d3f54dbc1aa72b9,10.0.0.1:443
/a144b632b672fed3d6d3cdc98d4b664993ae2c3d,10.0.0.7:443
/074d306725d881f0acd57ed9f08ab85976adef84,10.0.0.3:443
/c9c04880224d2977fa60a0eef06d1c795fe6a423,10.0.0.9:443
/d20ffc4abee9257d5dc1fca42c53f6db63a2a664,10.0.0.6:443
/316bdd0350db3c95c148e66a291f23d382251f92,10.0.0.3:443
/a91ec91455f9c64ff666f271a7697c41b6777c40,10.0.0.5:443
/ebaa896e268efc129dfb87e51c06b69c0d3d10b5,10.0.0.9:443
/baa5d67516c1a6346ab05f10821c7ad25d297cdf,10.0.0.7:443
/e8a049830e420d8502891a00eb55982ba0e9fd8a,10.0.0.2:443
/2885644f1735ca86b002e623d2ea6824977b3386,10.0.0.7:443
/85a0370ec94aca8895951954ca810d3b06b1f7af,10.0.0.4:443
/4eda15a4a13e529b2aea970a1b3591e537b8c906,10.0.0.3:443
/bfa3f4f77e6f65a87448d260eb7647b6e02cc8ca,10.0.0.5:443
/599ca6d3012a3aa53854fb5e0d25c303d622e8d3,10.0.0.6:443
/2332508d304442b6a6b4cebe557be963e31aa07b,10.0.0.3:443
/0c32f97e9ca195ab9eba167702fb7af9f9809bc2,10.0.0.6:443
/0c4ad0a80bdb18e6a82233a8ebf2103e72cda899,10.0.0.1:443
/56e0aaba9e15a92b4ef584d3e8ebe091c52e2f60,10.0.0.4:443
/0adf617e084382c2a23249aa985cd8b8acdaa65b,10.0.0.3:443
/ed18addfd26398806fca2a8a2fe14bbe090fe0e0,10.0.0.5:443
/d1a2ba53f3b93e000e644d5b1ad8c6854ea9f65d,10.0.0.6:443
/7107620d6a0be698aac43ffc25f86eae6f53a2e1,10.0.0.8:443
/3e4ae6d93c0251c47b243273c3cfa6a8e75561f1,10.0.0.8:443
/d4c0a3bd89b4dddf0a6ea0b0527290ea09eb5a9c,10.0.0.1:443
/152e9ed59081b05a44434e96acb10841a6ac4913,10.0.0.9:443
/37165697fc6ef78a2f77de34e332378708c645b8,10.0.0.6:443
/a4e2739e0c203344e3243ecd1085ae55c3e80e7a,10.0.0.8:443
/23795bea08d22a82aeb2fdde5cf561d439358287,10.0.0.8:443
/dfec4908e467ca09908796e4adaa92aa9483b5c3,10.0.0.3:443
/ee0b2b4442cf36ae3c29d01c815bfa6cfe48fdfc,10.0.0.1:443
/cb78173624dc4cb41a7b0237d17f7af6a083ea18,10.0.0.2:443
/d739f678a0da873018a2c748a92ee4719fce3a0f,10.0.0.4:443
/2323b27ff2ca01083ac32f731fbcd4d100e93e19,10.0.0.4:443
/d499e2ec08b34a0bf998c14c887b383265034c9f,10.0.0.9:443
/143084dfd65a8bdcd57fb64651f96008b040087b,10.0.0.6:443
/97eaf99e3029d498617ea5154d783a7eb5c70c48,10.0.0.4:443
/33a3ea00c963b4f990846722d410b76a95b8de58,10.0.0.7:443
/74102b50e78c7af16e679839dcbda2e0372b6007,10.0.0.7:443
/98e74ce3d20c7897ba902ddb716d77ccc12d7184,10.0.0.5:443
/04675a8dc4351a5aefbd2ca04b6fc110d396a968,10.0.0.6:443
/fc9d0c65157c42a6e4faaaaeeed021e97eba2829,10.0.0.1:443
/e79898b6d8400872a5e147b49e9a639756cbe950,10.0.0.4:443
/383d327d7129f3b712ff73766f8ce42c74e53b15,10.0.0.3:443
/210ecfe8a674dc8a5ce6e0ff2cecca5f78769a23,10.0.0.8:443
/0ceddf293af83e1c0b20a2f951c249dae970dac7,10.0.0.1:443
/35f0c2c9af248df96020a501f0cd01c35750cbc0,10.0.0.8:443
/52bc55a205fa328543f03294d9ed5417820fa601,10.0.0.6:443
/3e4431ffbb010af04dd776032e7ac6f59a3b104c,10.0.0.8:443
/4d9b2f4f469f2b2b5f3962306f97e75859c8d936,10.0.0.8:443
/56f073a5a58f509bd491f8884fb340a935f40308,10.0.0.4:443
/3c2b58f0da1000a8a78a3277b7fb5b1cfcd849a6,10.0.0.8:443
/928720a69173cbfedccbcfc04b198414b5164f00,10.0.0.6:443
/d87b50fec3d063f2a71d17c82d042c2abdcb7017,10.0.0.1:443
/3f56709086bde9189226f4c98d6debd284176644,10.0.0.2:443
/cdb098b71941c131bdafe7123c569453e08875b4,10.0.0.4:443
/75dc829b5562bf143582d8ebed2766892438fdf6,10.0.0.5:443
/e92003125b6bf378ea6279b52c744bbfa947c7ff,10.0.0.9:443
/43601fdc2b42ea10e6eddf5ad078d9959591b054,10.0.0.1:443
/4de1baf0df9a5653c30ba33552d107c8050ba65a,10.0.0.4:443
/d8d7b30cd8c473d9dfccf65626bdcaf093524fab,10.0.0.9:443
/44486486ce00b0c97d44bdaa8e9ad0cd4cef2f24,10.0.0.4:443
/2f2b76cd6ceb9d1942d5e9ef0f887e2447426de1,10.0.0.3:443
/0f4b330a57a82f31e00455ef74555b31f5bed5a5,10.0.0.3:443
/1ac2f42face0e1b71f40775e804816f562f5046b,10.0.0.4:443
/09d09a595f2bf13257e07133ac78186f35555584,10.0.0.5:443
/0110e2fe94b7def844f20606e2c8456d3502bccf,10.0.0.3:443
/e3b4ef341806cc37ca73c853f7fb946e1f5f6e8e,10.0.0.7:443
/38e28374b1a9f41bc73cdfce491eb2bb0bce6fbd,10.0.0.3:443
/d1713989a361ca1eed0f5f68fee5c50f91854b5f,10.0.0.3:443
/f23af6f472102edef6de9491f9a844070ba3029d,10.0.0.9:443
/f196a713839baae0bc51f4b1436927d36f2ff7d3,10.0.0.1:443
/7598e8238744d04076326a8e3597578f9ecc8c09,10.0.0.1:443
/59a02561ca435d78ab8aa7422feabf8773dec410,10.0.0.7:443
/ca62f7298625dc574326a50ad33a4538559fc47a,10.0.0.5:443
/04bb8594fabb6dab24d9913b98cf38d0563f83b7,10.0.0.6:443
/8841711058feec5187329165955a9c60407f83fc,10.0.0.4:443
/4d884355dd150df67499ef203c01635a229ba6c7,10.0.0.2:443
/071866a73e91187306c1e407f0115a4d4151aeb6,10.0.0.2:443
/afd57a92e88c0328fb968edd941b4100c6c9fb61,10.0.0.5:443
/531c345d706cd574388c37ad61ba205c406b1d59,10.0.0.3:443
/4502d8a61fb1a981fab9999a469dbac6e25e7f05,10.0.0.1:443
/e230fc4ffa3bc256581c8eadd173e1cbc73b0cfe,10.0.0.1:443
/05f427fdba3ff5f8d3039d3630772f161b8c9f14,10.0.0.3:443
/911e4045fe7a9e356c9a4a83edb7675844e71e1d,10.0.0.1:443
/6e23a5865d2058af3619068245ead5e51618e0c7,10.0.0.4:443
/d1a0e473ffe13801ea65171346ad99201c498078,10.0.0.3:443
/815a0974398d76788a4b3da7d0cca31f126d7de4,10.0.0.9:443
/4b5e04ef7002bbde1ab4076a838dec1de1d26f39,10.0.0.1:443
/524cccc30582e8a63d8312af6635e22baae5cc6c,10.0.0.6:443
/96b6e633b7128d91860de1ac281c3080fc3abafa,10.0.0.9:443
/bd9d0ab3293f97f2d0723f68fd28f15bd536c156,10.0.0.8:443
/47e8509e8d7eb07744a7bbb065e51a64af82fa8e,10.0.0.3:443
/7186ce2f19770360dfb9ecd97b89e16efac316d1,10.0.0.9:443
/eed6c4e6883b62cdc3a7b196d99273557b071dd2,10.0.0.2:443
/b96d9c4be72a2b9492c2d7317b95e9e8126b5c22,10.0.0.3:443
/548f0aba642aad540d75ba5179b4c4e0769d3f1c,10.0.0.9:443
/26584bcf23d47c9d064c9f80f8e428b694aca25d,10.0.0.9:443
/e2037c8f1d790b8c725d9b2935056043ecbd8e93,10.0.0.2:443
/7eba0adbc91b2e122b190de42e4560a2157b112d,10.0.0.8:443
/9146d10cd7ab98d06cd0f4b4197602778d6e4140,10.0.0.8:443
/9fa77d555984524b0361fb3072036e91cc6ea3f3,10.0.0.8:443
/52f48dffdc8736b0c0e71df9a3c0f80e88770725,10.0.0.3:443
/2265098f5d88d6d99acdb58cdcc1259d928625cc,10.0.0.9:443
/d6192927efab74c222d087ad8dc2e820dcb7f6ea,10.0.0.4:443
/18d1d3a2d5ed8d726aef5302787d369b79781866,10.0.0.4:443
/19c735d6358604129199dcd752ff41f4221fa57e,10.0.0.5:443
/b351359d0d09616a3fac0e66ae77e26357781753,10.0.0.6:443
/6dd6111dd3ca9ad06569393e50484d3347035fa0,10.0.0.6:443
/93e3738859d55cbde6df33ff6cb633a9f80dc0fa,10.0.0.9:443
/eedde9fe113eb083a5d3c2508057360ddc18686b,10.0.0.7:443
/62a3e2d15c1c66eb816f3dc13b5827984b9f699b,10.0.0.9:443
/f1b4e2ea07bb5ed2f47433b75d1f7c8ef5bbf7d8,10.0.0.9:443
/fa62a18b60e7385df8024532e7aeef87cc1df239,10.0.0.4:443
/7d1aa3efc0114d39a9f010b900e1db11151e639d,10.0.0.6:443
/f0468f9ae142e357977988238dd0688c49773aa0,10.0.0.4:443
/d8b5c9789b2aab6444a2ef1d8cef7ae774458ab8,10.0.0.4:443
/0a85a262365cc25072bfafe6136a7ba369566112,10.0.0.6:443
/4dded059c42653a7c1beaa541e63645bf551ad08,10.0.0.2:443
/49bd739af1ec870702c3847e967e8c6b435c5329,10.0.0.2:443
/8d95d2e2928f6c92b660ec1c1fa7df8ff813aec6,10.0.0.4:443
/93bf75656856b0a55da0773071dc39089dad6c98,10.0.0.3:443
/e3390ff69032d41cc118b525049af3a5b802fae7,10.0.0.2:443
/3150a053fd0dbbb2d248eebe951137ca8078f1d2,10.0.0.2:443
/3f9d225fb79822a7b27f99d8d201ca1a45bb2ab6,10.0.0.2:443
/452402b171258fcd3a5b107c1c939518102d2470,10.0.0.4:443
/08317e0decede1f945dc9e1fe173bb4f6fb8cfd6,10.0.0.3:443
/2cb847f4fbc26879a7b38289fb9add3bdc92a61b,10.0.0.4:443
/42ba7f638d09315bd314f1ecb6cbaba154074a68,10.0.0.5:443
/c0370eebe06344737bd403540f6adb13609b884f,10.0.0.8:443
/9c71526d82a2a81ac13581c5f93125202f95aff2,10.0.0.3:443
/e334fd8fafc2cb8809436c3e95e285d8c6079a94,10.0.0.7:443
/c396727f862d04377b9b67299231d1d64d3d208a,10.0.0.3:443
/6e51035a8d1f3bdd7f1732fd749ef13732a95944,10.0.0.8:443
/785266cb2e5abbf8e8d68ed6944c44bc8784e83d,10.0.0.7:443
/212094a923c2cc9abba620361a83ed32ffc89562,10.0.0.4:443
/139a4d3ff8c15c8bbe453dc8b51a4369a9761129,10.0.0.8:443
/990a8cbd6640ba4c5002b6e139b1ddc0784f687d,10.0.0.9:443
/a6fb42bfd3197c44719ed46faf9d90cb06548ad8,10.0.0.7:443
/000c6423f358a2c3d3f495b13d62c0d2c39f50bf,10.0.0.1:443
/1cec05fac01fdaacba7ce488edd944661542c006,10.0.0.3:443
/271f90733f7434e025ebc1f28267e53433857e1a,10.0.0.7:443
/39e38084ed794b098342c8e73c50eb6e8a473451,10.0.0.3:443
/391aed0b4c3552fe6045ee9e9fa5b1d749dad8f8,10.0.0.5:443
/6f08ffaed414be3b69721e7f6d32cdbf708af3b3,10.0.0.9:443
/e70ea65e0acabc146426f773ee43d27693b291cb,10.0.0.3:443
/b4e2cf556861071282564ee2f08e188fac4c5253,10.0.0.6:443
/6e68bc22256a8e097ae14c5309b968eac3ff115b,10.0.0.9:443
/47960ff89fcae7481243ffa5164fd694546f238a,10.0.0.3:443
/f574bef3f84ad106c19f59e9e9e02b531545144b,10.0.0.2:443
/4c3e44af33a9121771dfc8ec0f9315ac44133350,10.0.0.9:443
/b15cfc1da87f56a87bd209364583cb5ab32236dc,10.0.0.4:443
/40fa370749b2451bea54a014f1424acf8f26689f,10.0.0.5:443
/2b427b399d5020c4422e0795378c785ddb7294c2,10.0.0.1:443
/bc6adfe886d10fe6eafb996229582b4bfc8d222a,10.0.0.8:443
/f7b4e0538be13ab1bf2d07e6811a503a9017d9f3,10.0.0.2:443
/5e89704df5e29e7cf5d66780e8c21ee43976a437,10.0.0.4:443
/7b83fa2b9257a4c2d3130dca607ec04fb042a867,10.0.0.6:443
/a3b86bc98a2d6f3ffa6572d699127b0bddedfed8,10.0.0.8:443
/e4d0162d0642d1b8c48d405df85e19a38f0feeb3,10.0.0.7:443
/4d485cce05cbc8a437c1c0935d481a7bea42fca5,10.0.0.2:443
/845b67ac4a0134e07b0a024cc4e23712b6e6aa9b,10.0.0.2:443
/b79bb4986238b94afdd2cb060f5552eef7ff54e6,10.0.0.3:443
/af07fafd8a759a1cf5a609d578639be03caea26d,10.0.0.2:443
/81af00d61c1ef5b0613ad4beb2dd25f89efc6182,10.0.0.8:443
/593136c41899f34390c6d37d16edbbb9cfff7abf,10.0.0.7:443
/cf81d9fb40a3fc64d11770126f3a699d70eb0feb,10.0.0.7:443
/63c546d9b325c23c3861555b1e605cc203079992,10.0.0.7:443
/4ca3fa4d44be023ccfa77e4f77f68fa0b8ea7710,10.0.0.5:443
/e701444e1b1d9f079e9c879d8f999d88b56d68e4,10.0.0.5:443
/f6c473391fa230f190cdda902f793977ce7de8ad,10.0.0.4:443
/7bc73bd36bbd732b25e146e2f1b01003eb159775,10.0.0.6:443
/5f287e6957a09c1be550752505a5c1420ec8d3bc,10.0.0.1:443
/bebd964d7ca6eddee7de35715ebd96afa29fc8e2,10.0.0.2:443
/db7a5038c8f2d930653563a2e523683c0a93bc88,10.0.0.1:443
/47a642c6b0895911ca70f994f506c415505d75e1,10.0.0.9:443
/8c5554e66fa04d50fb84d5f49ace6de0852a34da,10.0.0.6:443
/89923f03ca432e5969fd0cdf5c15bb70b98efe2c,10.0.0.3:443
/7a0aa569021d8b2b2ce9bfcb07310c1ebfad5151,10.0.0.1:443
/0937e417478b8173a2bb86dc7b8b9454e059de4d,10.0.0.3:443
/188c99ab8d7267ee33a9a5acc8de76bd58898b45,10.0.0.2:443
/c4a8f4fb2483266c7166285df63f3640fa2319ae,10.0.0.8:443
/4b0d34f033113d93ecf921c6a0feb0b2d45fb126,10.0.0.9:443
/fb42de2cd7c51cacd89d7575bccd67f7ec25e451,10.0.0.4:443
/028a4c7b547864a379e75c95c587ce64202181dc,10.0.0.8:443
/cc5a526f4da916ea0a13b85e0c6e98b3d7437cc2,10.0.0.3:443
/9113f453c2c434907b4ff773fe5b86f21e382416,10.0.0.1:443
/830245306057d4c72845188ee1bcd3faec480fa2,10.0.0.1:443
/81d4b64d141924bb939f5379c79f68c8f73f6861,10.0.0.5:443
/be650948e3ec4fb93c66085f4ccf5b03ab52da7d,10.0.0.3:443
/dbf7fb2f6d1c71680d904a7982c255d38d9cb66d,10.0.0.6:443
/df2fd0964b001d47e7d7d8f704997f8b2c188e2b,10.0.0.3:443
/52599e6c628d3713b5a86776e7bf4e8eba08096b,10.0.0.8:443
/9a8dfa9f83cddf2f78d3dfaf2977d373250d08bc,10.0.0.6:443
/f6fff2fc64e81812c1fd6f44b17c0fca8f4d9b21,10.0.0.7:443
/c74b67ef81c0f0159ebf55bdb7cec7edbb49ef01,10.0.0.7:443
/1d489c78f83a538058f88a86d5d3b784a4c8c15b,10.0.0.3:443
/c19e2d12a05580a32ed36959def7a94542f3e4b2,10.0.0.2:443
/0ee2827f11b4ec40f5b9461cafbafc67c9bb19a4,10.0.0.6:443
/6fb7e55aa135073aafdaee252c0d41711f0012cf,10.0.0.2:443
/c31f4f1035ecf919c2a8ad9aaba8f34787b6dbfc,10.0.0.5:443
/da600775aba95b86e525802936ee1080854c9171,10.0.0.7:443
/713544fa205d177ae8892dcf355b3d9099d36346,10.0.0.9:443
/7c838304cffcd5141ea13dc98ebe74d3c9fb0b68,10.0.0.5:443
/1dfa23ab97268feb6b3fd019ff226fbcd628995c,10.0.0.2:443
/525a23553d91d72be5ae26127029e9e3bdfdb6d7,10.0.0.9:443
/963f18f8446ec2ab1921e0579c7af87477ef3592,10.0.0.3:443
/64b104d6efe390c5e8a008312e16772cc76632e8,10.0.0.2:443
/de3125db331f2c903e96f273a649f782abde3250,10.0.0.7:443
/fb2452ca4688e9fa8177919d72032717ac19d692,10.0.0.4:443
/76e8bbc123c7346c3398619f12d9888fee50811e,10.0.0.8:443
/d6f9d8096fe75dd35f317b7d1ce0bc02306f6121,10.0.0.3:443
/1e543c13b6af55fc60e9ba216790bccda9c2c3d1,10.0.0.2:443
/5f5831dbaeba200adb52eefeb26d792cf996a01f,10.0.0.3:443
/60ac99dd1fe31198399c6b447438e8ac62a35036,10.0.0.4:443
/579077bb8a893f47c1b1eca7d8cd2d62c8a3b774,10.0.0.2:443
/86590bffd69757efb277e2b6db69385bbb6ebba3,10.0.0.7:443
/f829ea6c46b59cde7ba3c6745b58c3ea170bc73b,10.0.0.3:443
/27f54b7138336175705207561c6a0a123049ab66,10.0.0.7:443
/fab8ba44a38c657d3b8f9066daac9f3222e3fd50,10.0.0.3:443
/03540d372c8e6621cf57ecb910cf872164f9c53a,10.0.0.9:443
/be320aecabb73286f782df249b53ef4f7c816b72,10.0.0.1:443
/7e383ecf09c026dec3eb9c77ca35274f209748a3,10.0.0.2:443
/749dd7577081e9d47a3bf9ef091b6e5b8daa5786,10.0.0.3:443
/9507f34faa73988fb0d35f65141b2e1b0f407cd7,10.0.0.3:443
/a2188b80856ef974da6eac0238a35afef2e1c30c,10.0.0.6:443
/1ca972ed159b7e8c838752ce60186262a923d057,10.0.0.4:443
/57e7abe6d05fe0443800929dcf46315bc5135519,10.0.0.4:443
/075a1e1426a5d210fd5501249809b1e20c793c92,10.0.0.2:443
/36c50cfcf56f7e15f91844b9f3201efa4cc26fc8,10.0.0.5:443
/f824965edace119738da671b1e3cba0e44be224d,10.0.0.5:443
/ba49bc86400cd3970ea82550b493ac21a5bf750c,10.0.0.6:443
/2dbc4c595e56af88a175811e715d908d04223c2b,10.0.0.1:443
/e322f439382552e76fb5672448c2fc6beb56e32f,10.0.0.8:443
/e86b62c9f240c97f1503be26f97fbd412b846e2d,10.0.0.3:443
/e15c58a856824298802fed4151234e2a4812697a,10.0.0.7:443
/e7b5cbc5701beadb3c8dc04361cdd1de8f8b0d1d,10.0.0.3:443
/bf317994d21c8731f4afdebb4d77e8eb997a962b,10.0.0.5:443
/cabb2f8ffcd78c654c2eb0bbecd32a4a5523c165,10.0.0.8:443
/17e54d3b00f5b989798846b7729789343df86d56,10.0.0.2:443
/334fcdca67212b01973f946729a2cb569adec510,10.0.0.3:443
/a5d301755197c85985ee48ec53b4c3a1b6bfd9fe,10.0.0.1:443
/f2b98e19ef44d81d16fce5e9e3b51c3be2a8c713,10.0.0.2:443
/6a69c385697c7ed6b85ced626d993b02216c2186,10.0.0.6:443
/7d269fcf2722875d9d75eb2e3a7e0fae6ae69a98,10.0.0.5:443
/126e39e079fa7f2b254146466e9ab79dc4347be6,10.0.0.6:443
/8a4aa1d8a6d6a2938ecf7a42814723e282a90af7,10.0.0.1:443
/b9d9fabb46dbe2db5f5e0164c98002114c96006e,10.0.0.2:443
/d50477758d2f8e99392ae9d51ecef57484c97c5c,10.0.0.1:443
/09ac262b5b37a5b19fa989e5ac75722fdb8e17fa,10.0.0.1:443
/e1a322b3fc40fbc296f9ed8dc86d6e91c6dc8edb,10.0.0.7:443
/1c08ea2a0c42f17d99acbcb2b0ffd33ba1fb34be,10.0.0.8:443
/6aeefd631b7ddc01936b7fc11f1e8aa5a892bec0,10.0.0.4:443
/f579e4e7a050029dd9f21a38104c1f1b65e201e5,10.0.0.3:443
/9764ed5aff70cac5a5859415251c0b78dfd37909,10.0.0.7:443
/42d5cfb85dba89806546ac382f61fd909cdd40e9,10.0.0.2:443
/4cf6e01e948d9cd30032b5ce77d2d8b2daaf21c8,10.0.0.2:443
/e0367d6d11e81aaed084aceac24b327abf053c0f,10.0.0.2:443
/28b88a8f6ce2d2b9bd5867f89555f2e43dc96c96,10.0.0.8:443
/56b207c041ede2daa065d56b03173663eb6a1f2b,10.0.0.9:443
/6aa844c78eb0fb44bd506a920360377163eb7150,10.0.0.9:443
/327dabc969eb616dc10e4ba33f45d0cc97bf79d9,10.0.0.4:443
/7352ac0a60c2f2bedb0d80b0249db5aa59e0c876,10.0.0.4:443
/70a2d83045053301afcd56deb30bf9095a9ad9d6,10.0.0.6:443
/cbd05a3e43fb4b1aa98283c8d460571c43144391,10.0.0.9:443
/0c903f04b7f6d4a59a97c4ae212a41ad579e5a1f,10.0.0.1:443
/3dc7749f4a1f30ab1da8b4e2722d3228b0e8b33d,10.0.0.7:443
/b502f7505431f9b895c9fe04de74250f27f96f7f,10.0.0.7:443
/14aa1d4b3ddf14d7f5cba9de1f08213a8a104131,10.0.0.9:443
/cce74a90b51c414ffbb28ad26794bf589c1eaf82,10.0.0.2:443
/a86f00b80c039be1e4fad0b5508bd1710ba08ae1,10.0.0.4:443
/055b3843846da3e34606dc209c9b30192e6119da,10.0.0.4:443
/f05c831872443fb183c6c19d1a9dbe45315f0205,10.0.0.4:443
/d974f0525fca48f3fae6d6cbd0b394e4324ad422,10.0.0.3:443
/00ef799f7c152055a4163c41bb9bcd37fa375f77,10.0.0.8:443
/7d876337b6f86b9873ccc49f7efaae91d180a06d,10.0.0.4:443
/93e9efe1dba728a9a51d68024eddf764903cac1f,10.0.0.3:443
/891d1f6116aec61df9dce652743ae159861e9160,10.0.0.5:443
/eec92486c171c84f0dcf3db001ce9b32615166f3,10.0.0.2:443
/363f23c9bea6cac01f85f7ec7142110363110c83,10.0.0.7:443
/51c699024eca637817d2bdad512b9c1d19fafec4,10.0.0.6:443
/b7aaa2cd36505f55bb047799649acadc5e20e499,10.0.0.8:443
/b17b2a0be4fa445a0f762a92dee8b8d83e992eae,10.0.0.8:443
/e4c7719fd87bd86bf5c818a7aa38af4d50691d51,10.0.0.6:443
/edf7c0e6f7c1bbdc7bc67912607a32ff5ea786fc,10.0.0.7:443
/62d1fb98dbf4825ae9db6f919233f32391ce1e63,10.0.0.8:443
/e0214b5cf9fb6464dfd37a84fd41a759e571a531,10.0.0.7:443
/bf7e606b9858081dd5f7f2346f70e314238809e3,10.0.0.9:443
/f4907d05ffb94d07ea2a1eb8dc187268cd386f0e,10.0.0.2:443
/cdfc8d53ae78926694e5d1ebe872d30f7e57b324,10.0.0.5:443
/0de8c668d5445e80ca5d0ece0e653fc664b494e8,10.0.0.3:443
/fafc8ecbe6fc726999a249b9a2bfcb52371dec5b,10.0.0.2:443
/ec395c49cff03d52796f59fabdabd045eac000c2,10.0.0.3:443
/0bfbcb5addb0f60e28504de36a10abb80d253011,10.0.0.3:443
/6bfbff0669887fbad3bc871771c62f9c5d723740,10.0.0.3:443
/d0b0e5ce39dbfd7a3b46542c1088e0231295be11,10.0.0.9:443
/79dc5f7b75fb82ed6dc0be947434d924ade41bc0,10.0.0.6:443
/f729ac665760539349f1c808bee81b70d58537d2,10.0.0.8:443
/fc97f5ede259d60ba9f28888ac3b2155b5490df6,10.0.0.1:443
/0c5cb1e0dfed034af7c893de52841686a762f6aa,10.0.0.2:443
/accc74f350aa74842630517d6abfed728d5da084,10.0.0.8:443
/1c2e760244036ee1f0539288d446830a0fa38457,10.0.0.7:443
/0f60c86359a38087105591e609f90b962b02b74d,10.0.0.4:443
/9e26cafc4562d086fadce9cdf56c049e9cfd93b8,10.0.0.5:443
/bce221dd780f255f84e159f1097ada3461ff5ab6,10.0.0.2:443
/c918871122a23989009073c879ff5c6d3add13c9,10.0.0.2:443
/50735adf2192b44d672b99cacf66e650582f9088,10.0.0.1:443
/fb763e843db495284a217b2ec5027d7698bd49ca,10.0.0.1:443
/ae068dab9d1784aa098085b9f62e168c79bb75cc,10.0.0.9:443
/6b989c5c0af97c2acd640cb2f05dc4b4fe7c9323,10.0.0.4:443
/fd81185d074101b22059f022b174d058b8d84a6e,10.0.0.8:443
/20277b4366206f795ed97f4631baa755636ddc1e,10.0.0.6:443
/c9199750bcb6c7b916958bee64f562547d6b1209,10.0.0.3:443
/9ea2e79813eb3a6d7b710affe1b15f7da29c1800,10.0.0.2:443
/2de1269fec587f989b911e4780eb056c310d2f38,10.0.0.3:443
/6869b9dc65fbfb56fbe828c0ac57da2bc7c0f460,10.0.0.6:443
/aab69be5e6c02f972e8a5e0e4c9716b84d91e667,10.0.0.6:443
/f8bb7d358e7c5e58b5d68a29a2d1ca4cb1a4af61,10.0.0.6:443
/441f8eab7ac90bb4022d35aca197b5765ecb7dbd,10.0.0.6:443
/36bad018c5b41a8f3731267f364cbe74cee32a81,10.0.0.4:443
/8d31a4ecc03b0d8680ccef099ed528f07a5e8aa0,10.0.0.8:443
/2e8ee9b09986f2e9435dc2b3b05d9353a8edfd3b,10.0.0.2:443
/950dedb075f2e3b2bc485659e0158fa0c26511ee,10.0.0.7:443
/2bff636db028f497c8d26cce0c138a5049e69db7,10.0.0.4:443
/33353d31c296876c1267ae7793d0c03caea2a387,10.0.0.6:443
/bf5e822a05f799f153c648ccad9987396c9350cd,10.0.0.8:443
/92a14cc304c5fce82d3e4999fb1a819c92cfe2fb,10.0.0.9:443
/06c1a1908f37b4cdacd7098b7304ced839f8412b,10.0.0.3:443
/de42c498d10e019fad348d4e2e512afd8981a7b9,10.0.0.3:443
/42fda236e32757240063ecb99681a30121629ab3,10.0.0.8:443
/b93d832939ca288351cb707d8332ff7aade5c6d0,10.0.0.1:443
/2fac7e24ec806b0b5b9f7bf99587e50357d9db04,10.0.0.9:443
/8750e04a953b8d6a5922ccd6172d59052671207a,10.0.0.5:443
/c5128cc73e48a0270a27e6e2b89f563dd756f987,10.0.0.6:443
/4f798881963c3638cd89168511eccdc8f6ecc62f,10.0.0.9:443
/330116fb139c1313bc78bf314a211d98dad8093e,10.0.0.1:443
/d2ff19c61e6be34733295d7c2f81b18c81a687bb,10.0.0.5:443
/5ff80f927eb571aa8cc006e0b7c237d8f571ffce,10.0.0.6:443
/b2ec40a643fd05a9f0d52b180893c9837a824060,10.0.0.8:443
/9445a32fc9ea94e27622826699123272ca5bb38a,10.0.0.8:443
/7b12433171a2a61d85b95d8ffd9ddd7e8ba1f0d2,10.0.0.5:443
/8f1cdae5a092253771e8d07f66ef45d6cc540c5e,10.0.0.6:443
/7b70d96ff0b14c53aaf134e16cb0ac8394b9ec93,10.0.0.7:443
/1c91cb4395964c049653b422be092ecc6957ccb1,10.0.0.3:443
/1680013d16baaadb8cffb6838dcaacfa9ddf815b,10.0.0.5:443
/a6cd614c3f4c92608f176ecbbd24d4d3e980c18e,10.0.0.5:443
/5f1ad23d97b5effe56b07770e1b1e3eb9302ce32,10.0.0.9:443
/4014ea7ccfafddaa137d536e55a4fcd04f4acd23,10.0.0.7:443
/4fe5c55f2b57b57de6773d70b788f2950f98aedb,10.0.0.4:443
/a5e42b24f4d2b631529dc5e693f0f44bec25d917,10.0.0.4:443
/605b3c16dde75dde0ed96aa420121d491043b868,10.0.0.5:443
/a44dfbc27a6071c7fc05ec6c9bcf08a80059d9fa,10.0.0.1:443
/376db7678f97b6614c81a967ac480d2af1ec0494,10.0.0.9:443
/7ab905b17ac0f3dabc9e73654f7108a60382b4bc,10.0.0.9:443
/7e15729b3688eac3a344b412fb66660812e51f5a,10.0.0.9:443
/fbe2e0d0d220635295397028926abcab95eae6f3,10.0.0.9:443
/68c054186128694496e7e34307333f0ee0fcdd83,10.0.0.5:443
/1539ed321544c684ffec1d533551c8ca3b814bed,10.0.0.2:443
/52239e8766c814c60a588320ff4e55dc9fed5345,10.0.0.9:443
/7dbfc596b9cf0c5fada4c6521d67d7e8d42f6fea,10.0.0.2:443
/d04621fb5f44305c3532899899c6f53b6afd5a0e,10.0.0.9:443
/581c7d2014ef9aa2a30e54ac748164ba66a534d1,10.0.0.8:443
/d6c7154f9d5d3dadd5e890c8f7a1b178c4992eea,10.0.0.2:443
/bbf232fd8d3a7f781b378f160e3fb629ce09ad59,10.0.0.5:443
/5290410ff6812f651bac427d10fae8ec7a40af1f,10.0.0.9:443
/41cfbbdf14643164ccb783e93eb123f23f2f32bc,10.0.0.8:443
/9259ff15fbb6d028166e31434d4e2dc335c390c1,10.0.0.2:443
/20258376b3bf0e3221118d0b5af2c4b8fa99fef6,10.0.0.6:443
/7e34ebb4b42eafe0d92fb748745416bdcdb67bb6,10.0.0.2:443
/2fe398814da4f25bf0f9f6982b43d955b93c9c4b,10.0.0.1:443
/6bcc489d911d7e972b2decd5827d4eb628111a84,10.0.0.4:443
/87c70ef625901e6ba2c9dd6951efa9964bebe528,10.0.0.1:443
/3b79cc56f47bc4487a12e940caff6f4c294b633d,10.0.0.1:443
/bc5e3f9cfd2ca035467321a7280dd2bf1250259c,10.0.0.4:443
/260555bb77d941f8761b6a8b81267b6884da79d0,10.0.0.5:443
/e77e4992d06c6b662602f32bf8a52b1567eb2f48,10.0.0.6:443
/0923bd0601bf7c074d173268cbf94cff7b39045b,10.0.0.6:443
/d7d8f74a97ab8b8f0df34c447e2ba2bf19a8a142,10.0.0.2:443
/1151eda28cedf93b701b4ae0f47b35c488d493af,10.0.0.3:443
/e4b07856919718e9c7cd00154fbb12c343bf5b55,10.0.0.6:443
/d5751616b55b115bb0260fee64eef350e83cf080,10.0.0.1:443
/83bcc3183e99dcba798f17ac6340722b1431ed0d,10.0.0.5:443
/2e84b809ca1744c2abfe7d8ec18dc80adbddc0bc,10.0.0.2:443
/782631660d3fd29586a00922d9e1885c30615bbf,10.0.0.2:443
/0b43b69669114bc2ef38ad7e03fae7a82ac22f29,10.0.0.5:443
/59bb4679fc26333daec10a9a06ebe185ab252628,10.0.0.4:443
/26b7bd3eae9d48dae1677893dc7efb7d7d9386ac,10.0.0.6:443
/1842eded9ad6f8e951f4e788bbedd8ad9b5d2289,10.0.0.8:443
/c0b818cdd2005b05932a6369a80d47993cf1982b,10.0.0.9:443
/6f3b0ed59323086c2557c6ba6aeb433bd82509bb,10.0.0.1:443
/bbe9eabb406b05b3b13ce634caf88123d095de39,10.0.0.3:443
/fad30adc6c95657b9be1a490b338798fa08504c4,10.0.0.3:443
/56a66dd5081af7ed9d9307374606245a751cceb7,10.0.0.4:443
/356beada6689aaba95fafc526101a982acb007ae,10.0.0.9:443
/c9917bb9de0eea13e66c71c88c35a03f3567bae2,10.0.0.1:443
/495d1913174caaaae68eda040e329cf9ab6b807c,10.0.0.2:443
/f89a11728d36429756ea52ed934a853ae2cf62a5,10.0.0.2:443
/3263ba8199377065aafe566132716c2bf4fa300d,10.0.0.9:443
/cf44e7fe43a615a8b58cf5c3b7486e95acf95904,10.0.0.5:443
/2a055e911545b3eb8ca31202c16a916a061eb35c,10.0.0.7:443
/120600f104f1305c96642a7382c7074386dc9f08,10.0.0.3:443
/05690a98a3b475aed24b300bd1d8303c5f0ed93c,10.0.0.8:443
/648371cafaf32c90a7c48ec2c6d57f84d4f81346,10.0.0.1:443
/c346dd5aae927ff513fe5a8ece05ec7c2d5674d1,10.0.0.9:443
/531b748b3a7ba01687e3eb6bff2df8951daaf1f4,10.0.0.5:443
/c0935bb5ad0fad0b3f9ff4b924c01971116eac7a,10.0.0.7:443
/2bf19d2be50af6207c977fa502caa8a6dd6722da,10.0.0.8:443
/afc63845c577fb67edb01f2f4faa19415f36420d,10.0.0.2:443
/efaa738ada05c233a8f0d57b3064d6ccd86b1a14,10.0.0.5:443
/b1d708f56428e8c5e9ae6f8a2a492c75271b8a45,10.0.0.5:443
/5b9abc512a9b5ee9083d0feec592002f5f31dad3,10.0.0.7:443
/55289a755e9560c48aa164f357598a47603badbd,10.0.0.2:443
/472ecb69a8ca3b590d1dd3153928d7bb09f43799,10.0.0.4:443
/072d33b67d2137d961ff8ee5a5c0d024196e4c91,10.0.0.7:443
/ca285c1755e98e6a8080cbe45f54e91f7d466c2a,10.0.0.2:443
/13661ad2ef4c48f7e99825bce7efc3833d0bb1d4,10.0.0.3:443
/b093d7e4f7588296fb1b02b38f20effb4c193571,10.0.0.1:443
/dc82252b61b41f0760eb44be138b6bca8a22b804,10.0.0.9:443
/1cddee843b564a0ef0acb8ca9d7dff0d7df3dcfd,10.0.0.2:443
/560e227e16cb133b6fe840bf9ef03e8aee0dad66,10.0.0.8:443
/55b1bd74ae0aea31731589550b7610ed00c1e616,10.0.0.9:443
/87f0d8ea3eaa758b41763fd06d2fa52d2fcff7a7,10.0.0.7:443
/e669bc52f256862c9ae2e0a7748ee3786a2abcf8,10.0.0.9:443
/3fca3907c98117eb1920bd03179a35aee3cf1939,10.0.0.3:443
/3d2582fcdf65814ef32ceb3ca30a38aba02020d2,10.0.0.7:443
/2628fbf7ed1e6f3f2152a57e1b7c440895088534,10.0.0.6:443
/37f57ab1100409e62e4ca9532dd245a2e84eab7a,10.0.0.8:443
/1558c2abe0ba2f4d83172d32ee9489a5f2469174,10.0.0.2:443
/8e2dc4336eb4fb16deebee7c3157b2a4ddc040d8,10.0.0.5:443
/016a870bfd0d09832a50ca14d666e7ef21978f5a,10.0.0.3:443
/f3d2b5c0cad02f1507f1a5c21b4cd8704f32ac11,10.0.0.6:443
/1ded786d6cc703eda6a20bb7388114a6b74cb17b,10.0.0.1:443
/de0132cb6cdc9aeba721296519d0b64bdb752855,10.0.0.6:443
/30f11551b9d666c96604d01526eb001c9cd7524c,10.0.0.9:443
/eb8bc43a527ac9f8088010954c78fff7a75f1bca,10.0.0.9:443
/30803c02cdc19bccba32ad9fc2e77a4d20eac091,10.0.0.5:443
/7281a18bbbb2dfb089b3039f516792e6b0c67714,10.0.0.2:443
/8787a462e8890da32babea287e4874d3da54246e,10.0.0.6:443
/2b76b52f43b204a6353a1f999ff40a221602199b,10.0.0.3:443
/56e71d678f5482e4dfe45ae83993e9e1c9fe2731,10.0.0.1:443
/08d820998361440ed076fa803e05d3c983383920,10.0.0.2:443
/cc449cf6cee8ce83f646097ac3d3287becacfe41,10.0.0.4:443
/2da713a30a3333b7895b3d65a320fe6c20bf670a,10.0.0.8:443
/658e0cefd2b9a97b2dd840c2a243afc0dbf06cf1,10.0.0.9:443

================================================
FILE: pingora-ketama/test-data/trace.sh
================================================
#!/bin/bash
set -eu
for i in {0..1000}; do
    URI=$(openssl rand -hex 20)
    curl http://localhost:8080/$URI -so /dev/null || true
done

================================================
FILE: pingora-ketama/tests/backwards_compat.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use old_version::{Bucket as OldBucket, Continuum as OldContinuum};
#[allow(unused_imports)]
use pingora_ketama::{Bucket, Continuum, Version, DEFAULT_POINT_MULTIPLE};
use rand::{random, random_range, rng, seq::IteratorRandom};
use std::collections::BTreeSet;
use std::net::{Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6};

mod old_version;

fn random_socket_addr() -> SocketAddr {
    if random::<bool>() {
        SocketAddr::V4(SocketAddrV4::new(Ipv4Addr::from_bits(random()), random()))
    } else {
        SocketAddr::V6(SocketAddrV6::new(
            Ipv6Addr::from_bits(random()),
            random(),
            0,
            0,
        ))
    }
}

fn random_string(len: usize) -> String {
    const CHARS: &str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
    let mut rng = rng();
    (0..len)
        .map(|_| CHARS.chars().choose(&mut rng).unwrap())
        .collect()
}

/// The old version of pingora-ketama should _always_ return the same result as
/// v1 of the new version as long as the original input is sorted by by socket
/// address (and has no duplicates). this test generates a large number of
/// random socket addresses with varying weights and compares the output of
/// both
#[test]
fn test_v1_to_old_version() {
    let (old_buckets, new_buckets): (BTreeSet<_>, BTreeSet<_>) = (0..2000)
        .map(|_| (random_socket_addr(), random_range(1..10)))
        .map(|(addr, weight)| (OldBucket::new(addr, weight), Bucket::new(addr, weight)))
        .unzip();

    let old_continuum = OldContinuum::new(&Vec::from_iter(old_buckets));
    let new_continuum = Continuum::new(&Vec::from_iter(new_buckets));

    for _ in 0..20_000 {
        let key = random_string(20);
        let old_node = old_continuum.node(key.as_bytes()).unwrap();
        let new_node = new_continuum.node(key.as_bytes()).unwrap();

        assert_eq!(old_node, new_node);
    }
}

/// The new version of pingora-ketama (v2) should return _almost_ exactly what
/// the old version does. The difference will be in collision handling
#[test]
#[cfg(feature = "v2")]
fn test_v2_to_old_version() {
    let (old_buckets, new_buckets): (BTreeSet<_>, BTreeSet<_>) = (0..2000)
        .map(|_| (random_socket_addr(), random_range(1..10)))
        .map(|(addr, weight)| (OldBucket::new(addr, weight), Bucket::new(addr, weight)))
        .unzip();

    let old_continuum = OldContinuum::new(&Vec::from_iter(old_buckets));

    let new_continuum = Continuum::new_with_version(
        &Vec::from_iter(new_buckets),
        Version::V2 {
            point_multiple: DEFAULT_POINT_MULTIPLE,
        },
    );

    let test_count = 20_000;
    let mut mismatches = 0;

    for _ in 0..test_count {
        let key = random_string(20);
        let old_node = old_continuum.node(key.as_bytes()).unwrap();
        let new_node = new_continuum.node(key.as_bytes()).unwrap();

        if old_node != new_node {
            mismatches += 1;
        }
    }

    assert!((mismatches as f64 / test_count as f64) < 0.001);
}


================================================
FILE: pingora-ketama/tests/old_version/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! This mod is a direct copy of the old version of pingora-ketama. It is here
//! to ensure that the new version's compatible mode is produces identical
//! results as the old version.

use std::cmp::Ordering;
use std::io::Write;
use std::net::SocketAddr;

use crc32fast::Hasher;

/// A [Bucket] represents a server for consistent hashing
///
/// A [Bucket] contains a [SocketAddr] to the server and a weight associated with it.
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord)]
pub struct Bucket {
    // The node name.
    // TODO: UDS
    node: SocketAddr,

    // The weight associated with a node. A higher weight indicates that this node should
    // receive more requests.
    weight: u32,
}

impl Bucket {
    /// Return a new bucket with the given node and weight.
    ///
    /// The chance that a [Bucket] is selected is proportional to the relative weight of all [Bucket]s.
    ///
    /// # Panics
    ///
    /// This will panic if the weight is zero.
    pub fn new(node: SocketAddr, weight: u32) -> Self {
        assert!(weight != 0, "weight must be at least one");

        Bucket { node, weight }
    }
}

// A point on the continuum.
#[derive(Clone, Debug, Eq, PartialEq)]
struct Point {
    // the index to the actual address
    node: u32,
    hash: u32,
}

// We only want to compare the hash when sorting, so we implement these traits by hand.
impl Ord for Point {
    fn cmp(&self, other: &Self) -> Ordering {
        self.hash.cmp(&other.hash)
    }
}

impl PartialOrd for Point {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.cmp(other))
    }
}

impl Point {
    fn new(node: u32, hash: u32) -> Self {
        Point { node, hash }
    }
}

/// The consistent hashing ring
///
/// A [Continuum] represents a ring of buckets where a node is associated with various points on
/// the ring.
pub struct Continuum {
    ring: Box<[Point]>,
    addrs: Box<[SocketAddr]>,
}

impl Continuum {
    /// Create a new [Continuum] with the given list of buckets.
    pub fn new(buckets: &[Bucket]) -> Self {
        // This constant is copied from nginx. It will create 160 points per weight unit. For
        // example, a weight of 2 will create 320 points on the ring.
        const POINT_MULTIPLE: u32 = 160;

        if buckets.is_empty() {
            return Continuum {
                ring: Box::new([]),
                addrs: Box::new([]),
            };
        }

        // The total weight is multiplied by the factor of points to create many points per node.
        let total_weight: u32 = buckets.iter().fold(0, |sum, b| sum + b.weight);
        let mut ring = Vec::with_capacity((total_weight * POINT_MULTIPLE) as usize);
        let mut addrs = Vec::with_capacity(buckets.len());

        for bucket in buckets {
            let mut hasher = Hasher::new();

            // We only do the following for backwards compatibility with nginx/memcache:
            // - Convert SocketAddr to string
            // - The hash input is as follows "HOST EMPTY PORT PREVIOUS_HASH". Spaces are only added
            //   for readability.
            // TODO: remove this logic and hash the literal SocketAddr once we no longer
            // need backwards compatibility

            // with_capacity = max_len(ipv6)(39) + len(null)(1) + max_len(port)(5)
            let mut hash_bytes = Vec::with_capacity(39 + 1 + 5);
            write!(&mut hash_bytes, "{}", bucket.node.ip()).unwrap();
            write!(&mut hash_bytes, "\0").unwrap();
            write!(&mut hash_bytes, "{}", bucket.node.port()).unwrap();
            hasher.update(hash_bytes.as_ref());

            // A higher weight will add more points for this node.
            let num_points = bucket.weight * POINT_MULTIPLE;

            // This is appended to the crc32 hash for each point.
            let mut prev_hash: u32 = 0;
            addrs.push(bucket.node);
            let node = addrs.len() - 1;
            for _ in 0..num_points {
                let mut hasher = hasher.clone();
                hasher.update(&prev_hash.to_le_bytes());

                let hash = hasher.finalize();
                ring.push(Point::new(node as u32, hash));
                prev_hash = hash;
            }
        }

        // Sort and remove any duplicates.
        ring.sort_unstable();
        ring.dedup_by(|a, b| a.hash == b.hash);

        Continuum {
            ring: ring.into_boxed_slice(),
            addrs: addrs.into_boxed_slice(),
        }
    }

    /// Find the associated index for the given input.
    pub fn node_idx(&self, input: &[u8]) -> usize {
        let hash = crc32fast::hash(input);

        // The `Result` returned here is either a match or the error variant returns where the
        // value would be inserted.
        match self.ring.binary_search_by(|p| p.hash.cmp(&hash)) {
            Ok(i) => i,
            Err(i) => {
                // We wrap around to the front if this value would be inserted at the end.
                if i == self.ring.len() {
                    0
                } else {
                    i
                }
            }
        }
    }

    /// Hash the given `hash_key` to the server address.
    pub fn node(&self, hash_key: &[u8]) -> Option<SocketAddr> {
        self.ring
            .get(self.node_idx(hash_key)) // should we unwrap here?
            .map(|p| self.addrs[p.node as usize])
    }
}


================================================
FILE: pingora-limits/Cargo.toml
================================================
[package]
name = "pingora-limits"
version = "0.8.0"
authors = ["Yuchen Wu <yuchen@cloudflare.com>"]
license = "Apache-2.0"
description = "A library for rate limiting and event frequency estimation"
edition = "2021"
repository = "https://github.com/cloudflare/pingora"
categories = ["algorithms"]
keywords = ["rate-limit", "pingora"]

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "pingora_limits"
path = "src/lib.rs"

[dependencies]
ahash = { workspace = true }

[dev-dependencies]
rand = "0.8"
dashmap = "5"
dhat = "0"
float-cmp = "0.9.0"

[[bench]]
name = "benchmark"
harness = false

[features]
dhat-heap = [] # for benchmark only


================================================
FILE: pingora-limits/LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: pingora-limits/benches/benchmark.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#[cfg(feature = "dhat-heap")]
#[global_allocator]
static ALLOC: dhat::Alloc = dhat::Alloc;

use ahash::RandomState;
use dashmap::DashMap;
use pingora_limits::estimator::Estimator;
use rand::distributions::Uniform;
use rand::{thread_rng, Rng};
use std::collections::HashMap;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use std::sync::Mutex;
use std::thread;
use std::time::Instant;

trait Counter {
    fn incr(&self, key: u32, value: usize);
    fn name() -> &'static str;
}

#[derive(Default)]
struct NaiveCounter(Mutex<HashMap<u32, usize>>);
impl Counter for NaiveCounter {
    fn incr(&self, key: u32, value: usize) {
        let mut map = self.0.lock().unwrap();
        if let Some(v) = map.get_mut(&key) {
            *v += value;
        } else {
            map.insert(key, value);
        }
    }

    fn name() -> &'static str {
        "Naive Counter"
    }
}

#[derive(Default)]
struct OptimizedCounter(DashMap<u32, AtomicUsize, RandomState>);
impl Counter for OptimizedCounter {
    fn incr(&self, key: u32, value: usize) {
        if let Some(v) = self.0.get(&key) {
            v.fetch_add(value, Ordering::Relaxed);
            return;
        }
        self.0.insert(key, AtomicUsize::new(value));
    }

    fn name() -> &'static str {
        "Optimized Counter"
    }
}

impl Counter for Estimator {
    fn incr(&self, key: u32, value: usize) {
        self.incr(key, value as isize);
    }

    fn name() -> &'static str {
        "Pingora Estimator"
    }
}

fn run_bench<T: Counter>(
    counter: &T,
    samples: usize,
    distribution: &Uniform<u32>,
    test_name: &str,
) {
    let mut rng = thread_rng();
    let before = Instant::now();
    for _ in 0..samples {
        let event: u32 = rng.sample(distribution);
        counter.incr(event, 1);
    }
    let elapsed = before.elapsed();
    println!(
        "{} {test_name} {:?} total, {:?} avg per operation",
        T::name(),
        elapsed,
        elapsed / samples as u32
    );
}

fn run_threaded_bench<T: Counter + Send + Sync + 'static>(
    threads: usize,
    counter: Arc<T>,
    samples: usize,
    distribution: &Uniform<u32>,
) {
    let mut handlers = vec![];
    for i in 0..threads {
        let est = counter.clone();
        let dist = *distribution;
        let handler = thread::spawn(move || {
            run_bench(est.as_ref(), samples, &dist, &format!("thread#{i}"));
        });
        handlers.push(handler);
    }
    for thread in handlers {
        thread.join().unwrap();
    }
}

/*
Pingora Estimator single thread 1.042849543s total, 10ns avg per operation
Naive Counter single thread 5.12641496s total, 51ns avg per operation
Optimized Counter single thread 4.302553352s total, 43ns avg per operation
Pingora Estimator thread#7 2.654667606s total, 212ns avg per operation
Pingora Estimator thread#2 2.65651993s total, 212ns avg per operation
Pingora Estimator thread#4 2.658225266s total, 212ns avg per operation
Pingora Estimator thread#0 2.660603361s total, 212ns avg per operation
Pingora Estimator thread#1 2.66139014s total, 212ns avg per operation
Pingora Estimator thread#6 2.663498849s total, 213ns avg per operation
Pingora Estimator thread#5 2.663344276s total, 213ns avg per operation
Pingora Estimator thread#3 2.664652951s total, 213ns avg per operation
Naive Counter thread#7 18.795881242s total, 1.503µs avg per operation
Naive Counter thread#1 18.805652672s total, 1.504µs avg per operation
Naive Counter thread#6 18.818084416s total, 1.505µs avg per operation
Naive Counter thread#4 18.832778982s total, 1.506µs avg per operation
Naive Counter thread#3 18.833952715s total, 1.506µs avg per operation
Naive Counter thread#2 18.837975133s total, 1.507µs avg per operation
Naive Counter thread#0 18.8397464s total, 1.507µs avg per operation
Naive Counter thread#5 18.842616299s total, 1.507µs avg per operation
Optimized Counter thread#4 2.650860314s total, 212ns avg per operation
Optimized Counter thread#0 2.651867013s total, 212ns avg per operation
Optimized Counter thread#2 2.656473381s total, 212ns avg per operation
Optimized Counter thread#5 2.657715876s total, 212ns avg per operation
Optimized Counter thread#1 2.658275111s total, 212ns avg per operation
Optimized Counter thread#7 2.658770751s total, 212ns avg per operation
Optimized Counter thread#6 2.659831251s total, 212ns avg per operation
Optimized Counter thread#3 2.664375398s total, 213ns avg per operation
*/

/* cargo bench --features dhat-heap for memory info

Pingora Estimator single thread 1.066846098s total, 10ns avg per operation
dhat: Total:     26,184 bytes in 9 blocks
dhat: At t-gmax: 26,184 bytes in 9 blocks
dhat: At t-end:  1,464 bytes in 5 blocks
dhat: The data has been saved to dhat-heap.json, and is viewable with dhat/dh_view.html
Naive Counter single thread 5.429089242s total, 54ns avg per operation
dhat: Total:     71,303,260 bytes in 20 blocks
dhat: At t-gmax: 53,477,392 bytes in 2 blocks
dhat: At t-end:  0 bytes in 0 blocks
dhat: The data has been saved to dhat-heap.json, and is viewable with dhat/dh_view.html
Optimized Counter single thread 4.361720355s total, 43ns avg per operation
dhat: Total:     71,307,722 bytes in 491 blocks
dhat: At t-gmax: 36,211,208 bytes in 34 blocks
dhat: At t-end:  0 bytes in 0 blocks
dhat: The data has been saved to dhat-heap.json, and is viewable with dhat/dh_view.html
*/

fn main() {
    const SAMPLES: usize = 100_000_000;
    const THREADS: usize = 8;
    const ITEMS: u32 = 1_000_000;
    const SAMPLES_PER_THREAD: usize = SAMPLES / THREADS;
    let distribution = Uniform::new(0, ITEMS);

    // single thread
    {
        #[cfg(feature = "dhat-heap")]
        let _profiler = dhat::Profiler::new_heap();
        let pingora_est = Estimator::new(3, 1024);
        run_bench(&pingora_est, SAMPLES, &distribution, "single thread");
    }

    {
        #[cfg(feature = "dhat-heap")]
        let _profiler = dhat::Profiler::new_heap();
        let naive: NaiveCounter = Default::default();
        run_bench(&naive, SAMPLES, &distribution, "single thread");
    }

    {
        #[cfg(feature = "dhat-heap")]
        let _profiler = dhat::Profiler::new_heap();
        let optimized: OptimizedCounter = Default::default();
        run_bench(&optimized, SAMPLES, &distribution, "single thread");
    }

    // multithread
    let pingora_est = Arc::new(Estimator::new(3, 1024));
    run_threaded_bench(THREADS, pingora_est, SAMPLES_PER_THREAD, &distribution);

    let naive: Arc<NaiveCounter> = Default::default();
    run_threaded_bench(THREADS, naive, SAMPLES_PER_THREAD, &distribution);

    let optimized: Arc<OptimizedCounter> = Default::default();
    run_threaded_bench(THREADS, optimized, SAMPLES_PER_THREAD, &distribution);
}


================================================
FILE: pingora-limits/src/estimator.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! The estimator module contains a Count-Min Sketch type to help estimate the frequency of an item.

use crate::hash;
use crate::RandomState;
use std::hash::Hash;
use std::sync::atomic::{AtomicIsize, Ordering};

/// An implementation of a lock-free count–min sketch estimator. See the [wikipedia] page for more
/// information.
///
/// [wikipedia]: https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch
pub struct Estimator {
    estimator: Box<[(Box<[AtomicIsize]>, RandomState)]>,
}

impl Estimator {
    /// Create a new `Estimator` with the given amount of hashes and columns (slots).
    pub fn new(hashes: usize, slots: usize) -> Self {
        Self {
            estimator: (0..hashes)
                .map(|_| (0..slots).map(|_| AtomicIsize::new(0)).collect::<Vec<_>>())
                .map(|slot| (slot.into_boxed_slice(), RandomState::new()))
                .collect::<Vec<_>>()
                .into_boxed_slice(),
        }
    }

    /// Increment `key` by the value given. Return the new estimated value as a result.
    /// Note: overflow can happen. When some of the internal counters overflow, a negative number
    /// will be returned. It is up to the caller to catch and handle this case.
    pub fn incr<T: Hash>(&self, key: T, value: isize) -> isize {
        self.estimator
            .iter()
            .fold(isize::MAX, |min, (slot, hasher)| {
                let hash = hash(&key, hasher) as usize;
                let counter = &slot[hash % slot.len()];
                // Overflow is allowed for simplicity
                let current = counter.fetch_add(value, Ordering::Relaxed);
                std::cmp::min(min, current + value)
            })
    }

    /// Decrement `key` by the value given.
    pub fn decr<T: Hash>(&self, key: T, value: isize) {
        for (slot, hasher) in self.estimator.iter() {
            let hash = hash(&key, hasher) as usize;
            let counter = &slot[hash % slot.len()];
            counter.fetch_sub(value, Ordering::Relaxed);
        }
    }

    /// Get the estimated frequency of `key`.
    pub fn get<T: Hash>(&self, key: T) -> isize {
        self.estimator
            .iter()
            .fold(isize::MAX, |min, (slot, hasher)| {
                let hash = hash(&key, hasher) as usize;
                let counter = &slot[hash % slot.len()];
                let current = counter.load(Ordering::Relaxed);
                std::cmp::min(min, current)
            })
    }

    /// Reset all values inside this `Estimator`.
    pub fn reset(&self) {
        self.estimator.iter().for_each(|(slot, _)| {
            slot.iter()
                .for_each(|counter| counter.store(0, Ordering::Relaxed))
        });
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn incr() {
        let est = Estimator::new(8, 8);
        let v = est.incr("a", 1);
        assert_eq!(v, 1);
        let v = est.incr("b", 1);
        assert_eq!(v, 1);
        let v = est.incr("a", 2);
        assert_eq!(v, 3);
        let v = est.incr("b", 2);
        assert_eq!(v, 3);
    }

    #[test]
    fn desc() {
        let est = Estimator::new(8, 8);
        est.incr("a", 3);
        est.incr("b", 3);
        est.decr("a", 1);
        est.decr("b", 1);
        assert_eq!(est.get("a"), 2);
        assert_eq!(est.get("b"), 2);
    }

    #[test]
    fn get() {
        let est = Estimator::new(8, 8);
        est.incr("a", 1);
        est.incr("a", 2);
        est.incr("b", 1);
        est.incr("b", 2);
        assert_eq!(est.get("a"), 3);
        assert_eq!(est.get("b"), 3);
    }

    #[test]
    fn reset() {
        let est = Estimator::new(8, 8);
        est.incr("a", 1);
        est.incr("a", 2);
        est.incr("b", 1);
        est.incr("b", 2);
        est.decr("b", 1);
        est.reset();
        assert_eq!(est.get("a"), 0);
        assert_eq!(est.get("b"), 0);
    }
}


================================================
FILE: pingora-limits/src/inflight.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! The inflight module defines the [Inflight] type which estimates the count of events occurring
//! at any point in time.

use crate::estimator::Estimator;
use crate::{hash, RandomState};
use std::hash::Hash;
use std::sync::Arc;

/// An `Inflight` type tracks the frequency of actions that are actively occurring. When the value
/// is dropped from scope, the count will automatically decrease.
pub struct Inflight {
    estimator: Arc<Estimator>,
    hasher: RandomState,
}

// fixed parameters for simplicity: hashes: h, slots: n
// Time complexity for a lookup operation is O(h). Space complexity is O(h*n)
// False positive ratio is 1/(n^h)
// We choose a small h and a large n to keep lookup cheap and FP ratio low
const HASHES: usize = 4;
const SLOTS: usize = 8192;

impl Inflight {
    /// Create a new `Inflight`.
    pub fn new() -> Self {
        Inflight {
            estimator: Arc::new(Estimator::new(HASHES, SLOTS)),
            hasher: RandomState::new(),
        }
    }

    /// Increment `key` by the value given. The return value is a tuple of a [Guard] and the
    /// estimated count.
    pub fn incr<T: Hash>(&self, key: T, value: isize) -> (Guard, isize) {
        let guard = Guard {
            estimator: self.estimator.clone(),
            id: hash(key, &self.hasher),
            value,
        };
        let estimation = guard.incr();
        (guard, estimation)
    }
}

/// A `Guard` is returned when an `Inflight` key is incremented via [Inflight::incr].
pub struct Guard {
    estimator: Arc<Estimator>,
    // store the hash instead of the actual key to save space
    id: u64,
    value: isize,
}

impl Guard {
    /// Increment the key's value that the `Guard` was created from.
    pub fn incr(&self) -> isize {
        self.estimator.incr(self.id, self.value)
    }

    /// Get the estimated count of the key that the `Guard` was created from.
    pub fn get(&self) -> isize {
        self.estimator.get(self.id)
    }
}

impl Drop for Guard {
    fn drop(&mut self) {
        self.estimator.decr(self.id, self.value)
    }
}

impl std::fmt::Debug for Guard {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("Guard")
            .field("id", &self.id)
            .field("value", &self.value)
            // no need to dump shared estimator
            .finish()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn inflight_count() {
        let inflight = Inflight::new();
        let (g1, v) = inflight.incr("a", 1);
        assert_eq!(v, 1);
        let (g2, v) = inflight.incr("a", 2);
        assert_eq!(v, 3);

        drop(g1);

        assert_eq!(g2.get(), 2);

        drop(g2);

        let (_, v) = inflight.incr("a", 1);
        assert_eq!(v, 1);
    }
}


================================================
FILE: pingora-limits/src/lib.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! The pingora_limits crate contains modules that can help introduce things like rate limiting or
//! thread-safe event count estimation.

#![warn(clippy::all)]
#![allow(clippy::new_without_default)]
#![allow(clippy::type_complexity)]

pub mod estimator;
pub mod inflight;
pub mod rate;

use ahash::RandomState;
use std::hash::Hash;

#[inline]
fn hash<T: Hash>(key: T, hasher: &RandomState) -> u64 {
    hasher.hash_one(key)
}


================================================
FILE: pingora-limits/src/rate.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! The rate module defines the [Rate] type that helps estimate the occurrence of events over a
//! period of time.

use crate::estimator::Estimator;
use std::hash::Hash;
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::time::{Duration, Instant};

/// Input struct to custom functions for calculating rate. Includes the counts
/// from the current interval, previous interval, the configured duration of an
/// interval, and the fraction into the current interval that the sample was
/// taken.
///
/// Ex. If the interval to the Rate instance is `10s`, and the rate calculation
/// is taken at 2 seconds after the start of the current interval, then the
/// fraction of the current interval returned in this struct will be `0.2`
/// meaning 20% of the current interval has elapsed
#[non_exhaustive]
#[derive(Debug, Clone, Copy, PartialEq, PartialOrd)]
pub struct RateComponents {
    pub prev_samples: isize,
    pub curr_samples: isize,
    pub interval: Duration,
    pub current_interval_fraction: f64,
}

/// A rate calculation function which uses a good estimate of the rate of events over the past
/// `interval` time.
///
/// Specifically, it linearly interpolates between the event counts of the previous and current
/// periods based on how far into the current period we are, as described in this post:
/// <https://blog.cloudflare.com/counting-things-a-lot-of-different-things/>
#[allow(dead_code)]
pub static PROPORTIONAL_RATE_ESTIMATE_CALC_FN: fn(RateComponents) -> f64 =
    |rate_info: RateComponents| {
        let prev = rate_info.prev_samples as f64;
        let curr = rate_info.curr_samples as f64;
        let interval_secs = rate_info.interval.as_secs_f64();
        let interval_fraction = rate_info.current_interval_fraction;

        let weighted_count = prev * (1. - interval_fraction) + curr;
        weighted_count / interval_secs
    };

/// A stable rate estimator that reports the rate of events per period of `interval` time.
///
/// It counts events for periods of `interval` and returns the average rate of the latest completed
/// period while counting events for the current (partial) period.
pub struct Rate {
    // 2 slots so that we use one to collect the current events and the other to report rate
    red_slot: Estimator,
    blue_slot: Estimator,
    red_or_blue: AtomicBool, // true: the current slot is red, otherwise blue
    start: Instant,
    // Use u64 below instead of Instant because we want atomic operation
    reset_interval_ms: u64, // the time interval to reset `current` and move it to `previous`
    last_reset_time: AtomicU64, // the timestamp in ms since `start`
    interval: Duration,
}

// see inflight module for the meaning for these numbers
const HASHES: usize = 4;
const SLOTS: usize = 1024; // This value can be lower if interval is short (key cardinality is low)

impl Rate {
    /// Create a new `Rate` with the given interval.
    pub fn new(interval: std::time::Duration) -> Self {
        Rate::new_with_estimator_config(interval, HASHES, SLOTS)
    }

    /// Create a new `Rate` with the given interval and Estimator config with the given amount of hashes and columns (slots).
    #[inline]
    pub fn new_with_estimator_config(
        interval: std::time::Duration,
        hashes: usize,
        slots: usize,
    ) -> Self {
        Rate {
            red_slot: Estimator::new(hashes, slots),
            blue_slot: Estimator::new(hashes, slots),
            red_or_blue: AtomicBool::new(true),
            start: Instant::now(),
            reset_interval_ms: interval.as_millis() as u64, // should be small not to overflow
            last_reset_time: AtomicU64::new(0),
            interval,
        }
    }

    fn current(&self, red_or_blue: bool) -> &Estimator {
        if red_or_blue {
            &self.red_slot
        } else {
            &self.blue_slot
        }
    }

    fn previous(&self, red_or_blue: bool) -> &Estimator {
        if red_or_blue {
            &self.blue_slot
        } else {
            &self.red_slot
        }
    }

    fn red_or_blue(&self) -> bool {
        self.red_or_blue.load(Ordering::SeqCst)
    }

    /// Return the per second rate estimation.
    ///
    /// This is the average rate of the latest completed period of length `interval`.
    pub fn rate<T: Hash>(&self, key: &T) -> f64 {
        let past_ms = self.maybe_reset();
        if past_ms >= self.reset_interval_ms * 2 {
            // already missed 2 intervals, no data, just report 0 as a short cut
            return 0f64;
        }

        self.previous(self.red_or_blue()).get(key) as f64 * 1000.0 / self.reset_interval_ms as f64
    }

    /// Report new events and return number of events seen so far in the current interval.
    pub fn observe<T: Hash>(&self, key: &T, events: isize) -> isize {
        self.maybe_reset();
        self.current(self.red_or_blue()).incr(key, events)
    }

    // reset if needed, return the time since last reset for other fn to use
    fn maybe_reset(&self) -> u64 {
        // should be short enough not to overflow
        let now = Instant::now().duration_since(self.start).as_millis() as u64;
        let last_reset = self.last_reset_time.load(Ordering::SeqCst);
        let past_ms = now - last_reset;

        if past_ms < self.reset_interval_ms {
            // no need to reset
            return past_ms;
        }
        let red_or_blue = self.red_or_blue();
        match self.last_reset_time.compare_exchange(
            last_reset,
            now,
            Ordering::SeqCst,
            Ordering::Acquire,
        ) {
            Ok(_) => {
                // first clear the previous slot
                self.previous(red_or_blue).reset();
                // then flip the flag to tell others to use the reset slot
                self.red_or_blue.store(!red_or_blue, Ordering::SeqCst);
                // if current time is beyond 2 intervals, the data stored in the previous slot
                // is also stale, we should clear that too
                if now - last_reset >= self.reset_interval_ms * 2 {
                    // Note that this is the previous one now because we just flipped self.red_or_blue
                    self.current(red_or_blue).reset();
                }
            }
            Err(new) => {
                // another thread beats us to it
                assert!(new >= now - 1000); // double check that the new timestamp looks right
            }
        }

        past_ms
    }

    /// Get the current rate as calculated with the given closure. This closure
    /// will take an argument containing all the accessible information about
    /// the rate from this object and allow the caller to make their own
    /// estimation of rate based on:
    ///
    /// 1. The accumulated samples in the current interval (in progress)
    /// 2. The accumulated samples in the previous interval (completed)
    /// 3. The size of the interval
    /// 4. Elapsed fraction of current interval for this sample (0..1)
    ///
    pub fn rate_with<F, T, K>(&self, key: &K, mut rate_calc_fn: F) -> T
    where
        F: FnMut(RateComponents) -> T,
        K: Hash,
    {
        let past_ms = self.maybe_reset();

        let (prev_samples, curr_samples) = if past_ms >= self.reset_interval_ms * 2 {
            // already missed 2 intervals, no data, just report 0 as a short cut
            (0, 0)
        } else if past_ms >= self.reset_interval_ms {
            (self.previous(self.red_or_blue()).get(key), 0)
        } else {
            let (prev_est, curr_est) = if self.red_or_blue() {
                (&self.blue_slot, &self.red_slot)
            } else {
                (&self.red_slot, &self.blue_slot)
            };

            (prev_est.get(key), curr_est.get(key))
        };

        rate_calc_fn(RateComponents {
            interval: self.interval,
            prev_samples,
            curr_samples,
            current_interval_fraction: (past_ms % self.reset_interval_ms) as f64
                / self.reset_interval_ms as f64,
        })
    }
}

#[cfg(test)]
mod tests {
    use float_cmp::assert_approx_eq;

    use super::*;
    use std::thread::sleep;
    use std::time::Duration;

    #[test]
    fn test_observe_rate() {
        let r = Rate::new(Duration::from_secs(1));
        let key = 1;

        // second: 0
        let observed = r.observe(&key, 3);
        assert_eq!(observed, 3);
        let observed = r.observe(&key, 2);
        assert_eq!(observed, 5);
        assert_eq!(r.rate(&key), 0f64); // no estimation yet because the interval has not passed

        // second: 1
        sleep(Duration::from_secs(1));
        let observed = r.observe(&key, 4);
        assert_eq!(observed, 4);
        assert_eq!(r.rate(&key), 5f64); // 5 rps

        // second: 2
        sleep(Duration::from_secs(1));
        assert_eq!(r.rate(&key), 4f64);

        // second: 3
        sleep(Duration::from_secs(1));
        assert_eq!(r.rate(&key), 0f64); // no event observed in the past 2 seconds
    }

    /// Assertion that 2 numbers are close within a generous margin. These
    /// tests are doing a lot of literal sleeping, so the measured results
    /// can't be accurate or consistent. This function does an assert with a
    /// generous tolerance
    fn assert_eq_ish(left: f64, right: f64) {
        assert_approx_eq!(f64, left, right, epsilon = 0.15)
    }

    #[test]
    fn test_observe_rate_custom_90_10() {
        let r = Rate::new(Duration::from_secs(1));
        let key = 1;

        let rate_90_10_fn = |rate_info: RateComponents| {
            let prev = rate_info.prev_samples as f64;
            let curr = rate_info.curr_samples as f64;
            (prev * 0.1 + curr * 0.9) / rate_info.interval.as_secs_f64()
        };

        // second: 0
        let observed = r.observe(&key, 3);
        assert_eq!(observed, 3);
        let observed = r.observe(&key, 2);
        assert_eq!(observed, 5);
        assert_eq!(r.rate_with(&key, rate_90_10_fn), 5. * 0.9);

        // second: 1
        sleep(Duration::from_secs(1));
        let observed = r.observe(&key, 4);
        assert_eq!(observed, 4);
        assert_eq!(r.rate_with(&key, rate_90_10_fn), 5. * 0.1 + 4. * 0.9);

        // second: 2
        sleep(Duration::from_secs(1));
        assert_eq!(r.rate_with(&key, rate_90_10_fn), 4. * 0.1);

        // second: 3
        sleep(Duration::from_secs(1));
        assert_eq!(r.rate_with(&key, rate_90_10_fn), 0f64);
    }

    #[test]
    fn test_observe_rate_custom_proportional() {
        let r = Rate::new(Duration::from_secs(1));
        let key = 1;

        // second: 0
        let observed = r.observe(&key, 3);
        assert_eq!(observed, 3);
        let observed = r.observe(&key, 2);
        assert_eq!(observed, 5);
        assert_eq_ish(r.rate_with(&key, PROPORTIONAL_RATE_ESTIMATE_CALC_FN), 5.);

        // second 0.5
        sleep(Duration::from_secs_f64(0.5));
        assert_eq_ish(r.rate_with(&key, PROPORTIONAL_RATE_ESTIMATE_CALC_FN), 5.);
        // rate() just looks at the previous interval, ignores current interval
        assert_eq_ish(r.rate(&key), 0.);

        // second: 1
        sleep(Duration::from_secs_f64(0.5));
        let observed = r.observe(&key, 4);
        assert_eq!(observed, 4);
        assert_eq_ish(r.rate_with(&key, PROPORTIONAL_RATE_ESTIMATE_CALC_FN), 9.);

        // second 1.75
        sleep(Duration::from_secs_f64(0.75));
        assert_eq_ish(
            r.rate_with(&key, PROPORTIONAL_RATE_ESTIMATE_CALC_FN),
            5. * 0.25 + 4.,
        );

        // second: 2
        sleep(Duration::from_secs_f64(0.25));
        assert_eq_ish(r.rate_with(&key, PROPORTIONAL_RATE_ESTIMATE_CALC_FN), 4.);
        assert_eq_ish(r.rate(&key), 4.);

        // second: 2.5
        sleep(Duration::from_secs_f64(0.5));
        assert_eq_ish(
            r.rate_with(&key, PROPORTIONAL_RATE_ESTIMATE_CALC_FN),
            4. / 2.,
        );
        assert_eq_ish(r.rate(&key), 4.);

        // second: 3
        sleep(Duration::from_secs(1));
        assert_eq!(r.rate_with(&key, PROPORTIONAL_RATE_ESTIMATE_CALC_FN), 0f64);
    }
}


================================================
FILE: pingora-load-balancing/Cargo.toml
================================================
[package]
name = "pingora-load-balancing"
version = "0.8.0"
authors = ["Yuchen Wu <yuchen@cloudflare.com>"]
license = "Apache-2.0"
edition = "2021"
repository = "https://github.com/cloudflare/pingora"
categories = ["network-programming"]
keywords = ["proxy", "pingora"]
description = """
Common load balancing features for Pingora proxy.
"""

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "pingora_load_balancing"
path = "src/lib.rs"

[dependencies]
async-trait = { workspace = true }
pingora-http = { version = "0.8.0", path = "../pingora-http" }
pingora-error = { version = "0.8.0", path = "../pingora-error" }
pingora-core = { version = "0.8.0", path = "../pingora-core", default-features = false }
pingora-ketama = { version = "0.8.0", path = "../pingora-ketama" }
pingora-runtime = { version = "0.8.0", path = "../pingora-runtime" }
arc-swap = "1"
fnv = "1"
rand = "0.8"
tokio = { workspace = true }
futures = "0"
log = { workspace = true }
http = { workspace = true }
derivative.workspace = true

[dev-dependencies]

[features]
default = []
openssl = ["pingora-core/openssl", "openssl_derived"]
boringssl = ["pingora-core/boringssl", "openssl_derived"]
rustls = ["pingora-core/rustls", "any_tls"]
s2n = ["pingora-core/s2n", "any_tls"]
openssl_derived = ["any_tls"]
any_tls = []
v2 = ["pingora-ketama/v2"]


================================================
FILE: pingora-load-balancing/LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: pingora-load-balancing/src/background.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Implement [BackgroundService] for [LoadBalancer]

use std::time::{Duration, Instant};

use super::{BackendIter, BackendSelection, LoadBalancer};
use async_trait::async_trait;
use pingora_core::services::{background::BackgroundService, ServiceReadyNotifier};

impl<S: Send + Sync + BackendSelection + 'static> LoadBalancer<S>
where
    S::Iter: BackendIter,
{
    pub async fn run(
        &self,
        shutdown: pingora_core::server::ShutdownWatch,
        mut ready_opt: Option<ServiceReadyNotifier>,
    ) -> () {
        // 136 years
        const NEVER: Duration = Duration::from_secs(u32::MAX as u64);
        let mut now = Instant::now();
        // run update and health check once
        let mut next_update = now;
        let mut next_health_check = now;

        loop {
            if *shutdown.borrow() {
                return;
            }

            if next_update <= now {
                // TODO: log err
                let _ = self.update().await;
                next_update = now + self.update_frequency.unwrap_or(NEVER);
            }

            // After the first update, discovery and selection setup will be
            // done, so we will notify dependents
            if let Some(ready) = ready_opt.take() {
                ServiceReadyNotifier::notify_ready(ready)
            }

            if next_health_check <= now {
                self.backends
                    .run_health_check(self.parallel_health_check)
                    .await;
                next_health_check = now + self.health_check_frequency.unwrap_or(NEVER);
            }

            if self.update_frequency.is_none() && self.health_check_frequency.is_none() {
                return;
            }
            let to_wake = std::cmp::min(next_update, next_health_check);
            tokio::time::sleep_until(to_wake.into()).await;
            now = Instant::now();
        }
    }
}

/// Implement [BackgroundService] for [LoadBalancer]. For backward-compatibility
/// reasons, we implement both the `start` and `start_with_ready_notifier`
/// methods.
#[async_trait]
impl<S: Send + Sync + BackendSelection + 'static> BackgroundService for LoadBalancer<S>
where
    S::Iter: BackendIter,
{
    async fn start_with_ready_notifier(
        &self,
        shutdown: pingora_core::server::ShutdownWatch,
        ready: ServiceReadyNotifier,
    ) -> () {
        self.run(shutdown, Some(ready)).await
    }

    async fn start(&self, shutdown: pingora_core::server::ShutdownWatch) -> () {
        self.run(shutdown, None).await
    }
}


================================================
FILE: pingora-load-balancing/src/discovery.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Service discovery interface and implementations

use arc_swap::ArcSwap;
use async_trait::async_trait;
use http::Extensions;
use pingora_core::protocols::l4::socket::SocketAddr;
use pingora_error::Result;
use std::io::Result as IoResult;
use std::net::ToSocketAddrs;
use std::{
    collections::{BTreeSet, HashMap},
    sync::Arc,
};

use crate::Backend;

/// [ServiceDiscovery] is the interface to discover [Backend]s.
#[async_trait]
pub trait ServiceDiscovery {
    /// Return the discovered collection of backends.
    /// And *optionally* whether these backends are enabled to serve or not in a `HashMap`. Any backend
    /// that is not explicitly in the set is considered enabled.
    async fn discover(&self) -> Result<(BTreeSet<Backend>, HashMap<u64, bool>)>;
}

// TODO: add DNS base discovery

/// A static collection of [Backend]s for service discovery.
#[derive(Default)]
pub struct Static {
    backends: ArcSwap<BTreeSet<Backend>>,
}

impl Static {
    /// Create a new boxed [Static] service discovery with the given backends.
    pub fn new(backends: BTreeSet<Backend>) -> Box<Self> {
        Box::new(Static {
            backends: ArcSwap::new(Arc::new(backends)),
        })
    }

    /// Create a new boxed [Static] from a given iterator of items that implements [ToSocketAddrs].
    pub fn try_from_iter<A, T: IntoIterator<Item = A>>(iter: T) -> IoResult<Box<Self>>
    where
        A: ToSocketAddrs,
    {
        let mut upstreams = BTreeSet::new();
        for addrs in iter.into_iter() {
            let addrs = addrs.to_socket_addrs()?.map(|addr| Backend {
                addr: SocketAddr::Inet(addr),
                weight: 1,
                ext: Extensions::new(),
            });
            upstreams.extend(addrs);
        }
        Ok(Self::new(upstreams))
    }

    /// return the collection to backends
    pub fn get(&self) -> BTreeSet<Backend> {
        BTreeSet::clone(&self.backends.load())
    }

    // Concurrent set/add/remove might race with each other
    // TODO: use a queue to avoid racing

    // TODO: take an impl iter
    #[allow(dead_code)]
    pub(crate) fn set(&self, backends: BTreeSet<Backend>) {
        self.backends.store(backends.into())
    }

    #[allow(dead_code)]
    pub(crate) fn add(&self, backend: Backend) {
        let mut new = self.get();
        new.insert(backend);
        self.set(new)
    }

    #[allow(dead_code)]
    pub(crate) fn remove(&self, backend: &Backend) {
        let mut new = self.get();
        new.remove(backend);
        self.set(new)
    }
}

#[async_trait]
impl ServiceDiscovery for Static {
    async fn discover(&self) -> Result<(BTreeSet<Backend>, HashMap<u64, bool>)> {
        // no readiness
        let health = HashMap::new();
        Ok((self.get(), health))
    }
}


================================================
FILE: pingora-load-balancing/src/health_check.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Health Check interface and methods.

use crate::Backend;
use arc_swap::ArcSwap;
use async_trait::async_trait;
use pingora_core::connectors::http::custom;
use pingora_core::connectors::{http::Connector as HttpConnector, TransportConnector};
use pingora_core::custom_session;
use pingora_core::protocols::http::custom::client::Session;
use pingora_core::upstreams::peer::{BasicPeer, HttpPeer, Peer};
use pingora_error::{Error, ErrorType::CustomCode, Result};
use pingora_http::{RequestHeader, ResponseHeader};
use std::sync::Arc;
use std::time::Duration;

/// [HealthObserve] is an interface for observing health changes of backends,
/// this is what's used for our health observation callback.
#[async_trait]
pub trait HealthObserve {
    /// Observes the health of a [Backend], can be used for monitoring purposes.
    async fn observe(&self, target: &Backend, healthy: bool);
}
/// Provided to a [HealthCheck] to observe changes to [Backend] health.
pub type HealthObserveCallback = Box<dyn HealthObserve + Send + Sync>;

/// Provided to a [HealthCheck] to fetch [Backend] summary for detailed logging.
pub type BackendSummary = Box<dyn Fn(&Backend) -> String + Send + Sync>;

/// [HealthCheck] is the interface to implement health check for backends
#[async_trait]
pub trait HealthCheck {
    /// Check the given backend.
    ///
    /// `Ok(())`` if the check passes, otherwise the check fails.
    async fn check(&self, target: &Backend) -> Result<()>;

    /// Called when the health changes for a [Backend].
    async fn health_status_change(&self, _target: &Backend, _healthy: bool) {}

    /// Called when a detailed [Backend] summary is needed.
    fn backend_summary(&self, target: &Backend) -> String {
        format!("{target:?}")
    }

    /// This function defines how many *consecutive* checks should flip the health of a backend.
    ///
    /// For example: with `success``: `true`: this function should return the
    /// number of check need to flip from unhealthy to healthy.
    fn health_threshold(&self, success: bool) -> usize;
}

/// TCP health check
///
/// This health check checks if a TCP (or TLS) connection can be established to a given backend.
pub struct TcpHealthCheck {
    /// Number of successful checks to flip from unhealthy to healthy.
    pub consecutive_success: usize,
    /// Number of failed checks to flip from healthy to unhealthy.
    pub consecutive_failure: usize,
    /// How to connect to the backend.
    ///
    /// This field defines settings like the connect timeout and src IP to bind.
    /// The SocketAddr of `peer_template` is just a placeholder which will be replaced by the
    /// actual address of the backend when the health check runs.
    ///
    /// By default, this check will try to establish a TCP connection. When the `sni` field is
    /// set, it will also try to establish a TLS connection on top of the TCP connection.
    pub peer_template: BasicPeer,
    connector: TransportConnector,
    /// A callback that is invoked when the `healthy` status changes for a [Backend].
    pub health_changed_callback: Option<HealthObserveCallback>,
}

impl Default for TcpHealthCheck {
    fn default() -> Self {
        let mut peer_template = BasicPeer::new("0.0.0.0:1");
        peer_template.options.connection_timeout = Some(Duration::from_secs(1));
        TcpHealthCheck {
            consecutive_success: 1,
            consecutive_failure: 1,
            peer_template,
            connector: TransportConnector::new(None),
            health_changed_callback: None,
        }
    }
}

impl TcpHealthCheck {
    /// Create a new [TcpHealthCheck] with the following default values
    /// * connect timeout: 1 second
    /// * consecutive_success: 1
    /// * consecutive_failure: 1
    pub fn new() -> Box<Self> {
        Box::<TcpHealthCheck>::default()
    }

    /// Create a new [TcpHealthCheck] that tries to establish a TLS connection.
    ///
    /// The default values are the same as [Self::new()].
    pub fn new_tls(sni: &str) -> Box<Self> {
        let mut new = Self::default();
        new.peer_template.sni = sni.into();
        Box::new(new)
    }

    /// Replace the internal tcp connector with the given [TransportConnector]
    pub fn set_connector(&mut self, connector: TransportConnector) {
        self.connector = connector;
    }
}

#[async_trait]
impl HealthCheck for TcpHealthCheck {
    fn health_threshold(&self, success: bool) -> usize {
        if success {
            self.consecutive_success
        } else {
            self.consecutive_failure
        }
    }

    async fn check(&self, target: &Backend) -> Result<()> {
        let mut peer = self.peer_template.clone();
        peer._address = target.addr.clone();
        self.connector.get_stream(&peer).await.map(|_| {})
    }

    async fn health_status_change(&self, target: &Backend, healthy: bool) {
        if let Some(callback) = &self.health_changed_callback {
            callback.observe(target, healthy).await;
        }
    }
}

type Validator = Box<dyn Fn(&ResponseHeader) -> Result<()> + Send + Sync>;

/// HTTP health check
///
/// This health check checks if it can receive the expected HTTP(s) response from the given backend.
pub struct HttpHealthCheck<C = ()>
where
    C: custom::Connector,
{
    /// Number of successful checks to flip from unhealthy to healthy.
    pub consecutive_success: usize,
    /// Number of failed checks to flip from healthy to unhealthy.
    pub consecutive_failure: usize,
    /// How to connect to the backend.
    ///
    /// This field defines settings like the connect timeout and src IP to bind.
    /// The SocketAddr of `peer_template` is just a placeholder which will be replaced by the
    /// actual address of the backend when the health check runs.
    ///
    /// Set the `scheme` field to use HTTPs.
    pub peer_template: HttpPeer,
    /// Whether the underlying TCP/TLS connection can be reused across checks.
    ///
    /// * `false` will make sure that every health check goes through TCP (and TLS) handshakes.
    ///   Established connections sometimes hide the issue of firewalls and L4 LB.
    /// * `true` will try to reuse connections across checks, this is the more efficient and fast way
    ///   to perform health checks.
    pub reuse_connection: bool,
    /// The request header to send to the backend
    pub req: RequestHeader,
    connector: HttpConnector<C>,
    /// Optional field to define how to validate the response from the server.
    ///
    /// If not set, any response with a `200 OK` is considered a successful check.
    pub validator: Option<Validator>,
    /// Sometimes the health check endpoint lives one a different port than the actual backend.
    /// Setting this option allows the health check to perform on the given port of the backend IP.
    pub port_override: Option<u16>,
    /// A callback that is invoked when the `healthy` status changes for a [Backend].
    pub health_changed_callback: Option<HealthObserveCallback>,
    /// An optional callback for backend summary reporting.
    pub backend_summary_callback: Option<BackendSummary>,
}

impl HttpHealthCheck<()> {
    /// Create a new [HttpHealthCheck] with the following default settings
    /// * connect timeout: 1 second
    /// * read timeout: 1 second
    /// * req: a GET to the `/` of the given host name
    /// * consecutive_success: 1
    /// * consecutive_failure: 1
    /// * reuse_connection: false
    /// * validator: `None`, any 200 response is considered successful
    pub fn new(host: &str, tls: bool) -> Self {
        let mut req = RequestHeader::build("GET", b"/", None).unwrap();
        req.append_header("Host", host).unwrap();
        let sni = if tls { host.into() } else { String::new() };
        let mut peer_template = HttpPeer::new("0.0.0.0:1", tls, sni);
        peer_template.options.connection_timeout = Some(Duration::from_secs(1));
        peer_template.options.read_timeout = Some(Duration::from_secs(1));
        HttpHealthCheck {
            consecutive_success: 1,
            consecutive_failure: 1,
            peer_template,
            connector: HttpConnector::new(None),
            reuse_connection: false,
            req,
            validator: None,
            port_override: None,
            health_changed_callback: None,
            backend_summary_callback: None,
        }
    }
}

impl<C> HttpHealthCheck<C>
where
    C: custom::Connector,
{
    /// Create a new [HttpHealthCheck] with the following default settings
    /// * connect timeout: 1 second
    /// * read timeout: 1 second
    /// * req: a GET to the `/` of the given host name
    /// * consecutive_success: 1
    /// * consecutive_failure: 1
    /// * reuse_connection: false
    /// * validator: `None`, any 200 response is considered successful
    pub fn new_custom(host: &str, tls: bool, custom: HttpConnector<C>) -> Self {
        let mut req = RequestHeader::build("GET", b"/", None).unwrap();
        req.append_header("Host", host).unwrap();
        let sni = if tls { host.into() } else { String::new() };
        let mut peer_template = HttpPeer::new("0.0.0.0:1", tls, sni);
        peer_template.options.connection_timeout = Some(Duration::from_secs(1));
        peer_template.options.read_timeout = Some(Duration::from_secs(1));
        HttpHealthCheck {
            consecutive_success: 1,
            consecutive_failure: 1,
            peer_template,
            connector: custom,
            reuse_connection: false,
            req,
            validator: None,
            port_override: None,
            health_changed_callback: None,
            backend_summary_callback: None,
        }
    }

    /// Replace the internal http connector with the given [HttpConnector]
    pub fn set_connector(&mut self, connector: HttpConnector<C>) {
        self.connector = connector;
    }

    pub fn set_backend_summary<F>(&mut self, callback: F)
    where
        F: Fn(&Backend) -> String + Send + Sync + 'static,
    {
        self.backend_summary_callback = Some(Box::new(callback));
    }
}

#[async_trait]
impl<C> HealthCheck for HttpHealthCheck<C>
where
    C: custom::Connector,
{
    fn health_threshold(&self, success: bool) -> usize {
        if success {
            self.consecutive_success
        } else {
            self.consecutive_failure
        }
    }

    async fn check(&self, target: &Backend) -> Result<()> {
        let mut peer = self.peer_template.clone();
        peer._address = target.addr.clone();
        if let Some(port) = self.port_override {
            peer._address.set_port(port);
        }
        let session = self.connector.get_http_session(&peer).await?;

        let mut session = session.0;
        let req = Box::new(self.req.clone());
        session.write_request_header(req).await?;
        session.finish_request_body().await?;

        custom_session!(session.finish_custom().await?);

        if let Some(read_timeout) = peer.options.read_timeout {
            session.set_read_timeout(Some(read_timeout));
        }

        session.read_response_header().await?;

        let resp = session.response_header().expect("just read");

        if let Some(validator) = self.validator.as_ref() {
            validator(resp)?;
        } else if resp.status != 200 {
            return Error::e_explain(
                CustomCode("non 200 code", resp.status.as_u16()),
                "during http healthcheck",
            );
        };

        while session.read_response_body().await?.is_some() {
            // drain the body if any
        }

        // TODO(slava): do it concurrently wtih body drain?
        custom_session!(session.drain_custom_messages().await?);

        if self.reuse_connection {
            let idle_timeout = peer.idle_timeout();
            self.connector
                .release_http_session(session, &peer, idle_timeout)
                .await;
        }

        Ok(())
    }
    async fn health_status_change(&self, target: &Backend, healthy: bool) {
        if let Some(callback) = &self.health_changed_callback {
            callback.observe(target, healthy).await;
        }
    }
    fn backend_summary(&self, target: &Backend) -> String {
        if let Some(callback) = &self.backend_summary_callback {
            callback(target)
        } else {
            format!("{target:?}")
        }
    }
}

#[derive(Clone)]
struct HealthInner {
    /// Whether the endpoint is healthy to serve traffic
    healthy: bool,
    /// Whether the endpoint is allowed to serve traffic independent of its health
    enabled: bool,
    /// The counter for stateful transition between healthy and unhealthy.
    /// When [healthy] is true, this counts the number of consecutive health check failures
    /// so that the caller can flip the healthy when a certain threshold is met, and vise versa.
    consecutive_counter: usize,
}

/// Health of backends that can be updated atomically
pub(crate) struct Health(ArcSwap<HealthInner>);

impl Default for Health {
    fn default() -> Self {
        Health(ArcSwap::new(Arc::new(HealthInner {
            healthy: true, // TODO: allow to start with unhealthy
            enabled: true,
            consecutive_counter: 0,
        })))
    }
}

impl Clone for Health {
    fn clone(&self) -> Self {
        let inner = self.0.load_full();
        Health(ArcSwap::new(inner))
    }
}

impl Health {
    pub fn ready(&self) -> bool {
        let h = self.0.load();
        h.healthy && h.enabled
    }

    pub fn enable(&self, enabled: bool) {
        let h = self.0.load();
        if h.enabled != enabled {
            // clone the inner
            let mut new_health = (**h).clone();
            new_health.enabled = enabled;
            self.0.store(Arc::new(new_health));
        };
    }

    // return true when the health is flipped
    pub fn observe_health(&self, health: bool, flip_threshold: usize) -> bool {
        let h = self.0.load();
        let mut flipped = false;
        if h.healthy != health {
            // opposite health observed, ready to increase the counter
            // clone the inner
            let mut new_health = (**h).clone();
            new_health.consecutive_counter += 1;
            if new_health.consecutive_counter >= flip_threshold {
                new_health.healthy = health;
                new_health.consecutive_counter = 0;
                flipped = true;
            }
            self.0.store(Arc::new(new_health));
        } else if h.consecutive_counter > 0 {
            // observing the same health as the current state.
            // reset the counter, if it is non-zero, because it is no longer consecutive
            let mut new_health = (**h).clone();
            new_health.consecutive_counter = 0;
            self.0.store(Arc::new(new_health));
        }
        flipped
    }
}

#[cfg(test)]
mod test {
    use std::{
        collections::{BTreeSet, HashMap},
        sync::atomic::{AtomicU16, Ordering},
    };

    use super::*;
    use crate::{discovery, Backends, SocketAddr};
    use async_trait::async_trait;
    use http::Extensions;

    #[tokio::test]
    async fn test_tcp_check() {
        let tcp_check = TcpHealthCheck::default();

        let backend = Backend {
            addr: SocketAddr::Inet("1.1.1.1:80".parse().unwrap()),
            weight: 1,
            ext: Extensions::new(),
        };

        assert!(tcp_check.check(&backend).await.is_ok());

        let backend = Backend {
            addr: SocketAddr::Inet("1.1.1.1:79".parse().unwrap()),
            weight: 1,
            ext: Extensions::new(),
        };

        assert!(tcp_check.check(&backend).await.is_err());
    }

    #[cfg(feature = "any_tls")]
    #[tokio::test]
    async fn test_tls_check() {
        let tls_check = TcpHealthCheck::new_tls("one.one.one.one");
        let backend = Backend {
            addr: SocketAddr::Inet("1.1.1.1:443".parse().unwrap()),
            weight: 1,
            ext: Extensions::new(),
        };

        assert!(tls_check.check(&backend).await.is_ok());
    }

    #[cfg(feature = "any_tls")]
    #[tokio::test]
    async fn test_https_check() {
        let https_check = HttpHealthCheck::new("one.one.one.one", true);

        let backend = Backend {
            addr: SocketAddr::Inet("1.1.1.1:443".parse().unwrap()),
            weight: 1,
            ext: Extensions::new(),
        };

        assert!(https_check.check(&backend).await.is_ok());
    }

    #[tokio::test]
    async fn test_http_custom_check() {
        let mut http_check = HttpHealthCheck::new("one.one.one.one", false);
        http_check.validator = Some(Box::new(|resp: &ResponseHeader| {
            if resp.status == 301 {
                Ok(())
            } else {
                Error::e_explain(
                    CustomCode("non 301 code", resp.status.as_u16()),
                    "during http healthcheck",
                )
            }
        }));

        let backend = Backend {
            addr: SocketAddr::Inet("1.1.1.1:80".parse().unwrap()),
            weight: 1,
            ext: Extensions::new(),
        };

        http_check.check(&backend).await.unwrap();

        assert!(http_check.check(&backend).await.is_ok());
    }

    #[tokio::test]
    async fn test_health_observe() {
        struct Observe {
            unhealthy_count: Arc<AtomicU16>,
        }
        #[async_trait]
        impl HealthObserve for Observe {
            async fn observe(&self, _target: &Backend, healthy: bool) {
                if !healthy {
                    self.unhealthy_count.fetch_add(1, Ordering::Relaxed);
                }
            }
        }

        let good_backend = Backend::new("127.0.0.1:79").unwrap();
        let new_good_backends = || -> (BTreeSet<Backend>, HashMap<u64, bool>) {
            let mut healthy = HashMap::new();
            healthy.insert(good_backend.hash_key(), true);
            let mut backends = BTreeSet::new();
            backends.extend(vec![good_backend.clone()]);
            (backends, healthy)
        };
        // tcp health check
        {
            let unhealthy_count = Arc::new(AtomicU16::new(0));
            let ob = Observe {
                unhealthy_count: unhealthy_count.clone(),
            };
            let bob = Box::new(ob);
            let tcp_check = TcpHealthCheck {
                health_changed_callback: Some(bob),
                ..Default::default()
            };

            let discovery = discovery::Static::default();
            let mut backends = Backends::new(Box::new(discovery));
            backends.set_health_check(Box::new(tcp_check));
            let result = new_good_backends();
            backends.do_update(result.0, result.1, |_backend: Arc<BTreeSet<Backend>>| {});
            // the backend is ready
            assert!(backends.ready(&good_backend));

            // run health check
            backends.run_health_check(false).await;
            assert!(1 == unhealthy_count.load(Ordering::Relaxed));
            // backend is unhealthy
            assert!(!backends.ready(&good_backend));
        }

        // http health check
        {
            let unhealthy_count = Arc::new(AtomicU16::new(0));
            let ob = Observe {
                unhealthy_count: unhealthy_count.clone(),
            };
            let bob = Box::new(ob);

            let mut https_check = HttpHealthCheck::new("one.one.one.one", true);
            https_check.health_changed_callback = Some(bob);

            let discovery = discovery::Static::default();
            let mut backends = Backends::new(Box::new(discovery));
            backends.set_health_check(Box::new(https_check));
            let result = new_good_backends();
            backends.do_update(result.0, result.1, |_backend: Arc<BTreeSet<Backend>>| {});
            // the backend is ready
            assert!(backends.ready(&good_backend));
            // run health check
            backends.run_health_check(false).await;
            assert!(1 == unhealthy_count.load(Ordering::Relaxed));
            assert!(!backends.ready(&good_backend));
        }
    }
}


================================================
FILE: pingora-load-balancing/src/lib.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! # Pingora Load Balancing utilities
//! This crate provides common service discovery, health check and load balancing
//! algorithms for proxies to use.

// https://github.com/mcarton/rust-derivative/issues/112
// False positive for macro generated code
#![allow(clippy::non_canonical_partial_ord_impl)]

use arc_swap::ArcSwap;
use derivative::Derivative;
use futures::FutureExt;
pub use http::Extensions;
use pingora_core::protocols::l4::socket::SocketAddr;
use pingora_error::{ErrorType, OrErr, Result};
use std::collections::hash_map::DefaultHasher;
use std::collections::{BTreeSet, HashMap};
use std::hash::{Hash, Hasher};
use std::io::Result as IoResult;
use std::net::ToSocketAddrs;
use std::sync::Arc;
use std::time::Duration;

mod background;
pub mod discovery;
pub mod health_check;
pub mod selection;

use discovery::ServiceDiscovery;
use health_check::Health;
use selection::UniqueIterator;
use selection::{BackendIter, BackendSelection};

pub mod prelude {
    pub use crate::health_check::TcpHealthCheck;
    pub use crate::selection::RoundRobin;
    pub use crate::LoadBalancer;
}

/// [Backend] represents a server to proxy or connect to.
#[derive(Derivative)]
#[derivative(Clone, Hash, PartialEq, PartialOrd, Eq, Ord, Debug)]
pub struct Backend {
    /// The address to the backend server.
    pub addr: SocketAddr,
    /// The relative weight of the server. Load balancing algorithms will
    /// proportionally distributed traffic according to this value.
    pub weight: usize,

    /// The extension field to put arbitrary data to annotate the Backend.
    /// The data added here is opaque to this crate hence the data is ignored by
    /// functionalities of this crate. For example, two backends with the same
    /// [SocketAddr] and the same weight but different `ext` data are considered
    /// identical.
    /// See [Extensions] for how to add and read the data.
    #[derivative(PartialEq = "ignore")]
    #[derivative(PartialOrd = "ignore")]
    #[derivative(Hash = "ignore")]
    #[derivative(Ord = "ignore")]
    pub ext: Extensions,
}

impl Backend {
    /// Create a new [Backend] with `weight` 1. The function will try to parse
    ///  `addr` into a [std::net::SocketAddr].
    pub fn new(addr: &str) -> Result<Self> {
        Self::new_with_weight(addr, 1)
    }

    /// Creates a new [Backend] with the specified `weight`. The function will try to parse
    /// `addr` into a [std::net::SocketAddr].
    pub fn new_with_weight(addr: &str, weight: usize) -> Result<Self> {
        let addr = addr
            .parse()
            .or_err(ErrorType::InternalError, "invalid socket addr")?;
        Ok(Backend {
            addr: SocketAddr::Inet(addr),
            weight,
            ext: Extensions::new(),
        })
        // TODO: UDS
    }

    pub(crate) fn hash_key(&self) -> u64 {
        let mut hasher = DefaultHasher::new();
        self.hash(&mut hasher);
        hasher.finish()
    }
}

impl std::ops::Deref for Backend {
    type Target = SocketAddr;

    fn deref(&self) -> &Self::Target {
        &self.addr
    }
}

impl std::ops::DerefMut for Backend {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.addr
    }
}

impl std::net::ToSocketAddrs for Backend {
    type Iter = std::iter::Once<std::net::SocketAddr>;

    fn to_socket_addrs(&self) -> std::io::Result<Self::Iter> {
        self.addr.to_socket_addrs()
    }
}

/// [Backends] is a collection of [Backend]s.
///
/// It includes a service discovery method (static or dynamic) to discover all
/// the available backends as well as an optional health check method to probe the liveness
/// of each backend.
pub struct Backends {
    discovery: Box<dyn ServiceDiscovery + Send + Sync + 'static>,
    health_check: Option<Arc<dyn health_check::HealthCheck + Send + Sync + 'static>>,
    backends: ArcSwap<BTreeSet<Backend>>,
    health: ArcSwap<HashMap<u64, Health>>,
}

impl Backends {
    /// Create a new [Backends] with the given [ServiceDiscovery] implementation.
    ///
    /// The health check method is by default empty.
    pub fn new(discovery: Box<dyn ServiceDiscovery + Send + Sync + 'static>) -> Self {
        Self {
            discovery,
            health_check: None,
            backends: Default::default(),
            health: Default::default(),
        }
    }

    /// Set the health check method. See [health_check] for the methods provided.
    pub fn set_health_check(
        &mut self,
        hc: Box<dyn health_check::HealthCheck + Send + Sync + 'static>,
    ) {
        self.health_check = Some(hc.into())
    }

    /// Updates backends when the new is different from the current set,
    /// the callback will be invoked when the new set of backend is different
    /// from the current one so that the caller can update the selector accordingly.
    fn do_update<F>(
        &self,
        new_backends: BTreeSet<Backend>,
        enablement: HashMap<u64, bool>,
        callback: F,
    ) where
        F: Fn(Arc<BTreeSet<Backend>>),
    {
        if (**self.backends.load()) != new_backends {
            let old_health = self.health.load();
            let mut health = HashMap::with_capacity(new_backends.len());
            for backend in new_backends.iter() {
                let hash_key = backend.hash_key();
                // use the default health if the backend is new
                let backend_health = old_health.get(&hash_key).cloned().unwrap_or_default();

                // override enablement
                if let Some(backend_enabled) = enablement.get(&hash_key) {
                    backend_health.enable(*backend_enabled);
                }
                health.insert(hash_key, backend_health);
            }

            // TODO: put this all under 1 ArcSwap so the update is atomic
            // It's important the `callback()` executes first since computing selector backends might
            // be expensive. For example, if a caller checks `backends` to see if any are available
            // they may encounter false positives if the selector isn't ready yet.
            let new_backends = Arc::new(new_backends);
            callback(new_backends.clone());
            self.backends.store(new_backends);
            self.health.store(Arc::new(health));
        } else {
            // no backend change, just check enablement
            for (hash_key, backend_enabled) in enablement.iter() {
                // override enablement if set
                // this get should always be Some(_) because we already populate `health`` for all known backends
                if let Some(backend_health) = self.health.load().get(hash_key) {
                    backend_health.enable(*backend_enabled);
                }
            }
        }
    }

    /// Whether a certain [Backend] is ready to serve traffic.
    ///
    /// This function returns true when the backend is both healthy and enabled.
    /// This function returns true when the health check is unset but the backend is enabled.
    /// When the health check is set, this function will return false for the `backend` it
    /// doesn't know.
    pub fn ready(&self, backend: &Backend) -> bool {
        self.health
            .load()
            .get(&backend.hash_key())
            // Racing: return `None` when this function is called between the
            // backend store and the health store
            .map_or(self.health_check.is_none(), |h| h.ready())
    }

    /// Manually set if a [Backend] is ready to serve traffic.
    ///
    /// This method does not override the health of the backend. It is meant to be used
    /// to stop a backend from accepting traffic when it is still healthy.
    ///
    /// This method is noop when the given backend doesn't exist in the service discovery.
    pub fn set_enable(&self, backend: &Backend, enabled: bool) {
        // this should always be Some(_) because health is always populated during update
        if let Some(h) = self.health.load().get(&backend.hash_key()) {
            h.enable(enabled)
        };
    }

    /// Return the collection of the backends.
    pub fn get_backend(&self) -> Arc<BTreeSet<Backend>> {
        self.backends.load_full()
    }

    /// Call the service discovery method to update the collection of backends.
    ///
    /// The callback will be invoked when the new set of backend is different
    /// from the current one so that the caller can update the selector accordingly.
    pub async fn update<F>(&self, callback: F) -> Result<()>
    where
        F: Fn(Arc<BTreeSet<Backend>>),
    {
        let (new_backends, enablement) = self.discovery.discover().await?;
        self.do_update(new_backends, enablement, callback);
        Ok(())
    }

    /// Run health check on all backends if it is set.
    ///
    /// When `parallel: true`, all backends are checked in parallel instead of sequentially
    pub async fn run_health_check(&self, parallel: bool) {
        use crate::health_check::HealthCheck;
        use log::{info, warn};
        use pingora_runtime::current_handle;

        async fn check_and_report(
            backend: &Backend,
            check: &Arc<dyn HealthCheck + Send + Sync>,
            health_table: &HashMap<u64, Health>,
        ) {
            let errored = check.check(backend).await.err();
            if let Some(h) = health_table.get(&backend.hash_key()) {
                let flipped =
                    h.observe_health(errored.is_none(), check.health_threshold(errored.is_none()));
                if flipped {
                    check.health_status_change(backend, errored.is_none()).await;
                    let summary = check.backend_summary(backend);
                    if let Some(e) = errored {
                        warn!("{summary} becomes unhealthy, {e}");
                    } else {
                        info!("{summary} becomes healthy");
                    }
                }
            }
        }

        let Some(health_check) = self.health_check.as_ref() else {
            return;
        };

        let backends = self.backends.load();
        if parallel {
            let health_table = self.health.load_full();
            let runtime = current_handle();
            let jobs = backends.iter().map(|backend| {
                let backend = backend.clone();
                let check = health_check.clone();
                let ht = health_table.clone();
                runtime.spawn(async move {
                    check_and_report(&backend, &check, &ht).await;
                })
            });

            futures::future::join_all(jobs).await;
        } else {
            for backend in backends.iter() {
                check_and_report(backend, health_check, &self.health.load()).await;
            }
        }
    }
}

/// A [LoadBalancer] instance contains the service discovery, health check and backend selection
/// all together.
///
/// In order to run service discovery and health check at the designated frequencies, the [LoadBalancer]
/// needs to be run as a [pingora_core::services::background::BackgroundService].
pub struct LoadBalancer<S>
where
    S: BackendSelection,
{
    backends: Backends,
    selector: ArcSwap<S>,

    config: Option<S::Config>,

    /// How frequent the health check logic (if set) should run.
    ///
    /// If `None`, the health check logic will only run once at the beginning.
    pub health_check_frequency: Option<Duration>,
    /// How frequent the service discovery should run.
    ///
    /// If `None`, the service discovery will only run once at the beginning.
    pub update_frequency: Option<Duration>,
    /// Whether to run health check to all backends in parallel. Default is false.
    pub parallel_health_check: bool,
}

impl<S> LoadBalancer<S>
where
    S: BackendSelection + 'static,
    S::Iter: BackendIter,
{
    /// Build a [LoadBalancer] with static backends created from the iter.
    ///
    /// Note: [ToSocketAddrs] will invoke blocking network IO for DNS lookup if
    /// the input cannot be directly parsed as [SocketAddr].
    pub fn try_from_iter<A, T: IntoIterator<Item = A>>(iter: T) -> IoResult<Self>
    where
        A: ToSocketAddrs,
    {
        let discovery = discovery::Static::try_from_iter(iter)?;
        let backends = Backends::new(discovery);
        let lb = Self::from_backends(backends);
        lb.update()
            .now_or_never()
            .expect("static should not block")
            .expect("static should not error");
        Ok(lb)
    }

    /// Build a [LoadBalancer] with the given [Backends] and the config.
    pub fn from_backends_with_config(backends: Backends, config_opt: Option<S::Config>) -> Self {
        let selector_raw = if let Some(config) = config_opt.as_ref() {
            S::build_with_config(&backends.get_backend(), config)
        } else {
            S::build(&backends.get_backend())
        };

        let selector = ArcSwap::new(Arc::new(selector_raw));

        LoadBalancer {
            backends,
            selector,
            config: config_opt,
            health_check_frequency: None,
            update_frequency: None,
            parallel_health_check: false,
        }
    }

    /// Build a [LoadBalancer] with the given [Backends].
    pub fn from_backends(backends: Backends) -> Self {
        Self::from_backends_with_config(backends, None)
    }

    /// Run the service discovery and update the selection algorithm.
    ///
    /// This function will be called every `update_frequency` if this [LoadBalancer] instance
    /// is running as a background service.
    pub async fn update(&self) -> Result<()> {
        self.backends
            .update(|backends| {
                let selector = if let Some(config) = &self.config {
                    S::build_with_config(&backends, config)
                } else {
                    S::build(&backends)
                };

                self.selector.store(Arc::new(selector))
            })
            .await
    }

    /// Return the first healthy [Backend] according to the selection algorithm and the
    /// health check results.
    ///
    /// The `key` is used for hash based selection and is ignored if the selection is random or
    /// round robin.
    ///
    /// the `max_iterations` is there to bound the search time for the next Backend. In certain
    /// algorithm like Ketama hashing, the search for the next backend is linear and could take
    /// a lot steps.
    // TODO: consider remove `max_iterations` as users have no idea how to set it.
    pub fn select(&self, key: &[u8], max_iterations: usize) -> Option<Backend> {
        self.select_with(key, max_iterations, |_, health| health)
    }

    /// Similar to [Self::select], return the first healthy [Backend] according to the selection algorithm
    /// and the user defined `accept` function.
    ///
    /// The `accept` function takes two inputs, the backend being selected and the internal health of that
    /// backend. The function can do things like ignoring the internal health checks or skipping this backend
    /// because it failed before. The `accept` function is called multiple times iterating over backends
    /// until it returns `true`.
    pub fn select_with<F>(&self, key: &[u8], max_iterations: usize, accept: F) -> Option<Backend>
    where
        F: Fn(&Backend, bool) -> bool,
    {
        let selection = self.selector.load();
        let mut iter = UniqueIterator::new(selection.iter(key), max_iterations);
        while let Some(b) = iter.get_next() {
            if accept(&b, self.backends.ready(&b)) {
                return Some(b);
            }
        }
        None
    }

    /// Set the health check method. See [health_check].
    pub fn set_health_check(
        &mut self,
        hc: Box<dyn health_check::HealthCheck + Send + Sync + 'static>,
    ) {
        self.backends.set_health_check(hc);
    }

    /// Access the [Backends] of this [LoadBalancer]
    pub fn backends(&self) -> &Backends {
        &self.backends
    }
}

#[cfg(test)]
mod test {
    use std::sync::atomic::{AtomicBool, Ordering::Relaxed};

    use super::*;
    use async_trait::async_trait;

    #[tokio::test]
    async fn test_static_backends() {
        let backends: LoadBalancer<selection::RoundRobin> =
            LoadBalancer::try_from_iter(["1.1.1.1:80", "1.0.0.1:80"]).unwrap();

        let backend1 = Backend::new("1.1.1.1:80").unwrap();
        let backend2 = Backend::new("1.0.0.1:80").unwrap();
        let backend = backends.backends().get_backend();
        assert!(backend.contains(&backend1));
        assert!(backend.contains(&backend2));
    }

    #[tokio::test]
    async fn test_backends() {
        let discovery = discovery::Static::default();
        let good1 = Backend::new("1.1.1.1:80").unwrap();
        discovery.add(good1.clone());
        let good2 = Backend::new("1.0.0.1:80").unwrap();
        discovery.add(good2.clone());
        let bad = Backend::new("127.0.0.1:79").unwrap();
        discovery.add(bad.clone());

        let mut backends = Backends::new(Box::new(discovery));
        let check = health_check::TcpHealthCheck::new();
        backends.set_health_check(check);

        // true: new backend discovered
        let updated = AtomicBool::new(false);
        backends
            .update(|_| updated.store(true, Relaxed))
            .await
            .unwrap();
        assert!(updated.load(Relaxed));

        // false: no new backend discovered
        let updated = AtomicBool::new(false);
        backends
            .update(|_| updated.store(true, Relaxed))
            .await
            .unwrap();
        assert!(!updated.load(Relaxed));

        backends.run_health_check(false).await;

        let backend = backends.get_backend();
        assert!(backend.contains(&good1));
        assert!(backend.contains(&good2));
        assert!(backend.contains(&bad));

        assert!(backends.ready(&good1));
        assert!(backends.ready(&good2));
        assert!(!backends.ready(&bad));
    }
    #[tokio::test]
    async fn test_backends_with_ext() {
        let discovery = discovery::Static::default();
        let mut b1 = Backend::new("1.1.1.1:80").unwrap();
        b1.ext.insert(true);
        let mut b2 = Backend::new("1.0.0.1:80").unwrap();
        b2.ext.insert(1u8);
        discovery.add(b1.clone());
        discovery.add(b2.clone());

        let backends = Backends::new(Box::new(discovery));

        // fill in the backends
        backends.update(|_| {}).await.unwrap();

        let backend = backends.get_backend();
        assert!(backend.contains(&b1));
        assert!(backend.contains(&b2));

        let b2 = backend.first().unwrap();
        assert_eq!(b2.ext.get::<u8>(), Some(&1));

        let b1 = backend.last().unwrap();
        assert_eq!(b1.ext.get::<bool>(), Some(&true));
    }

    #[tokio::test]
    async fn test_discovery_readiness() {
        use discovery::Static;

        struct TestDiscovery(Static);
        #[async_trait]
        impl ServiceDiscovery for TestDiscovery {
            async fn discover(&self) -> Result<(BTreeSet<Backend>, HashMap<u64, bool>)> {
                let bad = Backend::new("127.0.0.1:79").unwrap();
                let (backends, mut readiness) = self.0.discover().await?;
                readiness.insert(bad.hash_key(), false);
                Ok((backends, readiness))
            }
        }
        let discovery = Static::default();
        let good1 = Backend::new("1.1.1.1:80").unwrap();
        discovery.add(good1.clone());
        let good2 = Backend::new("1.0.0.1:80").unwrap();
        discovery.add(good2.clone());
        let bad = Backend::new("127.0.0.1:79").unwrap();
        discovery.add(bad.clone());
        let discovery = TestDiscovery(discovery);

        let backends = Backends::new(Box::new(discovery));

        // true: new backend discovered
        let updated = AtomicBool::new(false);
        backends
            .update(|_| updated.store(true, Relaxed))
            .await
            .unwrap();
        assert!(updated.load(Relaxed));

        let backend = backends.get_backend();
        assert!(backend.contains(&good1));
        assert!(backend.contains(&good2));
        assert!(backend.contains(&bad));

        assert!(backends.ready(&good1));
        assert!(backends.ready(&good2));
        assert!(!backends.ready(&bad));
    }

    #[tokio::test]
    async fn test_parallel_health_check() {
        let discovery = discovery::Static::default();
        let good1 = Backend::new("1.1.1.1:80").unwrap();
        discovery.add(good1.clone());
        let good2 = Backend::new("1.0.0.1:80").unwrap();
        discovery.add(good2.clone());
        let bad = Backend::new("127.0.0.1:79").unwrap();
        discovery.add(bad.clone());

        let mut backends = Backends::new(Box::new(discovery));
        let check = health_check::TcpHealthCheck::new();
        backends.set_health_check(check);

        // true: new backend discovered
        let updated = AtomicBool::new(false);
        backends
            .update(|_| updated.store(true, Relaxed))
            .await
            .unwrap();
        assert!(updated.load(Relaxed));

        backends.run_health_check(true).await;

        assert!(backends.ready(&good1));
        assert!(backends.ready(&good2));
        assert!(!backends.ready(&bad));
    }

    mod thread_safety {
        use super::*;

        struct MockDiscovery {
            expected: usize,
        }
        #[async_trait]
        impl ServiceDiscovery for MockDiscovery {
            async fn discover(&self) -> Result<(BTreeSet<Backend>, HashMap<u64, bool>)> {
                let mut d = BTreeSet::new();
                let mut m = HashMap::with_capacity(self.expected);
                for i in 0..self.expected {
                    let b = Backend::new(&format!("1.1.1.1:{i}")).unwrap();
                    m.insert(i as u64, true);
                    d.insert(b);
                }
                Ok((d, m))
            }
        }

        #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
        async fn test_consistency() {
            let expected = 3000;
            let discovery = MockDiscovery { expected };
            let lb = Arc::new(LoadBalancer::<selection::Consistent>::from_backends(
                Backends::new(Box::new(discovery)),
            ));
            let lb2 = lb.clone();

            tokio::spawn(async move {
                assert!(lb2.update().await.is_ok());
            });
            let mut backend_count = 0;
            while backend_count == 0 {
                let backends = lb.backends();
                backend_count = backends.backends.load_full().len();
            }
            assert_eq!(backend_count, expected);
            assert!(lb.select_with(b"test", 1, |_, _| true).is_some());
        }
    }
}


================================================
FILE: pingora-load-balancing/src/selection/algorithms.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Implementation of algorithms for weighted selection
//!
//! All [std::hash::Hasher] + [Default] can be used directly as a selection algorithm.

use super::*;
use std::hash::Hasher;
use std::sync::atomic::{AtomicUsize, Ordering};

impl<H> SelectionAlgorithm for H
where
    H: Default + Hasher,
{
    fn new() -> Self {
        H::default()
    }
    fn next(&self, key: &[u8]) -> u64 {
        let mut hasher = H::default();
        hasher.write(key);
        hasher.finish()
    }
}

/// Round Robin selection
pub struct RoundRobin(AtomicUsize);

impl SelectionAlgorithm for RoundRobin {
    fn new() -> Self {
        Self(AtomicUsize::new(0))
    }
    fn next(&self, _key: &[u8]) -> u64 {
        self.0.fetch_add(1, Ordering::Relaxed) as u64
    }
}

/// Random selection
pub struct Random;

impl SelectionAlgorithm for Random {
    fn new() -> Self {
        Self
    }
    fn next(&self, _key: &[u8]) -> u64 {
        use rand::Rng;
        let mut rng = rand::thread_rng();
        rng.gen()
    }
}


================================================
FILE: pingora-load-balancing/src/selection/consistent.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Consistent Hashing

use super::*;
use pingora_core::protocols::l4::socket::SocketAddr;
use pingora_ketama::{Bucket, Continuum, Version};
use std::collections::HashMap;

/// Weighted Ketama consistent hashing
pub struct KetamaHashing {
    ring: Continuum,
    // TODO: update Ketama to just store this
    backends: HashMap<SocketAddr, Backend>,
}

#[derive(Clone, Debug, Copy, Default)]
pub struct KetamaConfig {
    pub point_multiple: Option<u32>,
}

impl BackendSelection for KetamaHashing {
    type Iter = OwnedNodeIterator;

    type Config = KetamaConfig;

    fn build_with_config(backends: &BTreeSet<Backend>, config: &Self::Config) -> Self {
        let KetamaConfig { point_multiple } = *config;

        let buckets: Vec<_> = backends
            .iter()
            .filter_map(|b| {
                // FIXME: ketama only supports Inet addr, UDS addrs are ignored here
                if let SocketAddr::Inet(addr) = b.addr {
                    Some(Bucket::new(addr, b.weight as u32))
                } else {
                    None
                }
            })
            .collect();
        let new_backends = backends
            .iter()
            .map(|b| (b.addr.clone(), b.clone()))
            .collect();

        #[allow(unused)]
        let version = if let Some(point_multiple) = point_multiple {
            match () {
                #[cfg(feature = "v2")]
                () => Version::V2 { point_multiple },
                #[cfg(not(feature = "v2"))]
                () => Version::V1,
            }
        } else {
            Version::V1
        };

        KetamaHashing {
            ring: Continuum::new_with_version(&buckets, version),
            backends: new_backends,
        }
    }

    fn build(backends: &BTreeSet<Backend>) -> Self {
        Self::build_with_config(backends, &KetamaConfig::default())
    }

    fn iter(self: &Arc<Self>, key: &[u8]) -> Self::Iter {
        OwnedNodeIterator {
            idx: self.ring.node_idx(key),
            ring: self.clone(),
        }
    }
}

/// Iterator over a Continuum
pub struct OwnedNodeIterator {
    idx: usize,
    ring: Arc<KetamaHashing>,
}

impl BackendIter for OwnedNodeIterator {
    fn next(&mut self) -> Option<&Backend> {
        self.ring.ring.get_addr(&mut self.idx).and_then(|addr| {
            let addr = SocketAddr::Inet(*addr);
            self.ring.backends.get(&addr)
        })
    }
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn test_ketama() {
        let b1 = Backend::new("1.1.1.1:80").unwrap();
        let b2 = Backend::new("1.0.0.1:80").unwrap();
        let b3 = Backend::new("1.0.0.255:80").unwrap();
        let backends = BTreeSet::from_iter([b1.clone(), b2.clone(), b3.clone()]);
        let hash = Arc::new(KetamaHashing::build(&backends));

        let mut iter = hash.iter(b"test0");
        assert_eq!(iter.next(), Some(&b2));
        let mut iter = hash.iter(b"test1");
        assert_eq!(iter.next(), Some(&b1));
        let mut iter = hash.iter(b"test2");
        assert_eq!(iter.next(), Some(&b1));
        let mut iter = hash.iter(b"test3");
        assert_eq!(iter.next(), Some(&b1));
        let mut iter = hash.iter(b"test4");
        assert_eq!(iter.next(), Some(&b1));
        let mut iter = hash.iter(b"test5");
        assert_eq!(iter.next(), Some(&b3));
        let mut iter = hash.iter(b"test6");
        assert_eq!(iter.next(), Some(&b1));
        let mut iter = hash.iter(b"test7");
        assert_eq!(iter.next(), Some(&b3));
        let mut iter = hash.iter(b"test8");
        assert_eq!(iter.next(), Some(&b1));
        let mut iter = hash.iter(b"test9");
        assert_eq!(iter.next(), Some(&b2));

        // remove b3
        let backends = BTreeSet::from_iter([b1.clone(), b2.clone()]);
        let hash = Arc::new(KetamaHashing::build(&backends));
        let mut iter = hash.iter(b"test0");
        assert_eq!(iter.next(), Some(&b2));
        let mut iter = hash.iter(b"test1");
        assert_eq!(iter.next(), Some(&b1));
        let mut iter = hash.iter(b"test2");
        assert_eq!(iter.next(), Some(&b1));
        let mut iter = hash.iter(b"test3");
        assert_eq!(iter.next(), Some(&b1));
        let mut iter = hash.iter(b"test4");
        assert_eq!(iter.next(), Some(&b1));
        let mut iter = hash.iter(b"test5");
        assert_eq!(iter.next(), Some(&b2)); // changed
        let mut iter = hash.iter(b"test6");
        assert_eq!(iter.next(), Some(&b1));
        let mut iter = hash.iter(b"test7");
        assert_eq!(iter.next(), Some(&b1)); // changed
        let mut iter = hash.iter(b"test8");
        assert_eq!(iter.next(), Some(&b1));
        let mut iter = hash.iter(b"test9");
        assert_eq!(iter.next(), Some(&b2));
    }
}


================================================
FILE: pingora-load-balancing/src/selection/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Backend selection interfaces and algorithms

pub mod algorithms;
pub mod consistent;
pub mod weighted;

use super::Backend;
use std::collections::{BTreeSet, HashSet};
use std::sync::Arc;
use weighted::Weighted;

/// [BackendSelection] is the interface to implement backend selection mechanisms.
pub trait BackendSelection: Sized {
    /// The [BackendIter] returned from iter() below.
    type Iter;

    /// The configuration type constructing [BackendSelection]
    type Config: Send + Sync;

    /// Create a [BackendSelection] from a set of backends and the given configuration. The
    /// default implementation ignores the configuration and simply calls [Self::build]
    fn build_with_config(backends: &BTreeSet<Backend>, _config: &Self::Config) -> Self {
        Self::build(backends)
    }

    /// The function to create a [BackendSelection] implementation.
    fn build(backends: &BTreeSet<Backend>) -> Self;
    /// Select backends for a given key.
    ///
    /// An [BackendIter] should be returned. The first item in the iter is the first
    /// choice backend. The user should continue to iterate over it if the first backend
    /// cannot be used due to its health or other reasons.
    fn iter(self: &Arc<Self>, key: &[u8]) -> Self::Iter
    where
        Self::Iter: BackendIter;
}

/// An iterator to find the suitable backend
///
/// Similar to [Iterator] but allow self referencing.
pub trait BackendIter {
    /// Return `Some(&Backend)` when there are more backends left to choose from.
    fn next(&mut self) -> Option<&Backend>;
}

/// [SelectionAlgorithm] is the interface to implement selection algorithms.
///
/// All [std::hash::Hasher] + [Default] can be used directly as a selection algorithm.
pub trait SelectionAlgorithm {
    /// Create a new implementation
    fn new() -> Self;
    /// Return the next index of backend. The caller should perform modulo to get
    /// the valid index of the backend.
    fn next(&self, key: &[u8]) -> u64;
}

/// [FNV](https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function) hashing
/// on weighted backends
pub type FNVHash = Weighted<fnv::FnvHasher>;

/// Alias of [`FNVHash`] for backwards compatibility until the next breaking change
#[doc(hidden)]
pub type FVNHash = Weighted<fnv::FnvHasher>;
/// Random selection on weighted backends
pub type Random = Weighted<algorithms::Random>;
/// Round robin selection on weighted backends
pub type RoundRobin = Weighted<algorithms::RoundRobin>;
/// Consistent Ketama hashing on weighted backends
pub type Consistent = consistent::KetamaHashing;

// TODO: least conn

/// An iterator which wraps another iterator and yields unique items. It optionally takes a max
/// number of iterations if the wrapped iterator never returns.
pub struct UniqueIterator<I>
where
    I: BackendIter,
{
    iter: I,
    seen: HashSet<u64>,
    max_iterations: usize,
    steps: usize,
}

impl<I> UniqueIterator<I>
where
    I: BackendIter,
{
    /// Wrap a new iterator and specify the maximum number of times we want to iterate.
    pub fn new(iter: I, max_iterations: usize) -> Self {
        Self {
            iter,
            max_iterations,
            seen: HashSet::new(),
            steps: 0,
        }
    }

    pub fn get_next(&mut self) -> Option<Backend> {
        while let Some(item) = self.iter.next() {
            if self.steps >= self.max_iterations {
                return None;
            }
            self.steps += 1;

            let hash_key = item.hash_key();
            if !self.seen.contains(&hash_key) {
                self.seen.insert(hash_key);
                return Some(item.clone());
            }
        }

        None
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    struct TestIter {
        seq: Vec<Backend>,
        idx: usize,
    }
    impl TestIter {
        fn new(input: &[&Backend]) -> Self {
            Self {
                seq: input.iter().cloned().cloned().collect(),
                idx: 0,
            }
        }
    }
    impl BackendIter for TestIter {
        fn next(&mut self) -> Option<&Backend> {
            let idx = self.idx;
            self.idx += 1;
            self.seq.get(idx)
        }
    }

    #[test]
    fn unique_iter_max_iterations_is_correct() {
        let b1 = Backend::new("1.1.1.1:80").unwrap();
        let b2 = Backend::new("1.0.0.1:80").unwrap();
        let b3 = Backend::new("1.0.0.255:80").unwrap();
        let items = [&b1, &b2, &b3];

        let mut all = UniqueIterator::new(TestIter::new(&items), 3);
        assert_eq!(all.get_next(), Some(b1.clone()));
        assert_eq!(all.get_next(), Some(b2.clone()));
        assert_eq!(all.get_next(), Some(b3.clone()));
        assert_eq!(all.get_next(), None);

        let mut stop = UniqueIterator::new(TestIter::new(&items), 1);
        assert_eq!(stop.get_next(), Some(b1));
        assert_eq!(stop.get_next(), None);
    }

    #[test]
    fn unique_iter_duplicate_items_are_filtered() {
        let b1 = Backend::new("1.1.1.1:80").unwrap();
        let b2 = Backend::new("1.0.0.1:80").unwrap();
        let b3 = Backend::new("1.0.0.255:80").unwrap();
        let items = [&b1, &b1, &b2, &b2, &b2, &b3];

        let mut uniq = UniqueIterator::new(TestIter::new(&items), 10);
        assert_eq!(uniq.get_next(), Some(b1));
        assert_eq!(uniq.get_next(), Some(b2));
        assert_eq!(uniq.get_next(), Some(b3));
    }
}


================================================
FILE: pingora-load-balancing/src/selection/weighted.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Weighted Selection

use super::{Backend, BackendIter, BackendSelection, SelectionAlgorithm};
use fnv::FnvHasher;
use std::collections::BTreeSet;
use std::sync::Arc;

/// Weighted selection with a given selection algorithm
///
/// The default algorithm is [FnvHasher]. See [super::algorithms] for more choices.
pub struct Weighted<H = FnvHasher> {
    backends: Box<[Backend]>,
    // each item is an index to the `backends`, use u16 to save memory, support up to 2^16 backends
    weighted: Box<[u16]>,
    algorithm: H,
}

impl<H: SelectionAlgorithm> BackendSelection for Weighted<H> {
    type Iter = WeightedIterator<H>;

    type Config = ();

    fn build(backends: &BTreeSet<Backend>) -> Self {
        assert!(
            backends.len() <= u16::MAX as usize,
            "support up to 2^16 backends"
        );
        let backends = Vec::from_iter(backends.iter().cloned()).into_boxed_slice();
        let mut weighted = Vec::with_capacity(backends.len());
        for (index, b) in backends.iter().enumerate() {
            for _ in 0..b.weight {
                weighted.push(index as u16);
            }
        }
        Weighted {
            backends,
            weighted: weighted.into_boxed_slice(),
            algorithm: H::new(),
        }
    }

    fn iter(self: &Arc<Self>, key: &[u8]) -> Self::Iter {
        WeightedIterator::new(key, self.clone())
    }
}

/// An iterator over the backends of a [Weighted] selection.
///
/// See [super::BackendSelection] for more information.
pub struct WeightedIterator<H> {
    // the unbounded index seed
    index: u64,
    backend: Arc<Weighted<H>>,
    first: bool,
}

impl<H: SelectionAlgorithm> WeightedIterator<H> {
    /// Constructs a new [WeightedIterator].
    fn new(input: &[u8], backend: Arc<Weighted<H>>) -> Self {
        Self {
            index: backend.algorithm.next(input),
            backend,
            first: true,
        }
    }
}

impl<H: SelectionAlgorithm> BackendIter for WeightedIterator<H> {
    fn next(&mut self) -> Option<&Backend> {
        if self.backend.backends.is_empty() {
            // short circuit if empty
            return None;
        }

        if self.first {
            // initial hash, select from the weighted list
            self.first = false;
            let len = self.backend.weighted.len();
            let index = self.backend.weighted[self.index as usize % len];
            Some(&self.backend.backends[index as usize])
        } else {
            // fallback, select from the unique list
            // deterministically select the next item
            self.index = self.backend.algorithm.next(&self.index.to_le_bytes());
            let len = self.backend.backends.len();
            Some(&self.backend.backends[self.index as usize % len])
        }
    }
}

#[cfg(test)]
mod test {
    use super::super::algorithms::*;
    use super::*;
    use std::collections::HashMap;

    #[test]
    fn test_fnv() {
        let b1 = Backend::new("1.1.1.1:80").unwrap();
        let mut b2 = Backend::new("1.0.0.1:80").unwrap();
        b2.weight = 10; // 10x than the rest
        let b3 = Backend::new("1.0.0.255:80").unwrap();
        let backends = BTreeSet::from_iter([b1.clone(), b2.clone(), b3.clone()]);
        let hash: Arc<Weighted> = Arc::new(Weighted::build(&backends));

        // same hash iter over
        let mut iter = hash.iter(b"test");
        // first, should be weighted
        assert_eq!(iter.next(), Some(&b2));
        // fallbacks, should be uniform, not weighted
        assert_eq!(iter.next(), Some(&b2));
        assert_eq!(iter.next(), Some(&b2));
        assert_eq!(iter.next(), Some(&b1));
        assert_eq!(iter.next(), Some(&b3));
        assert_eq!(iter.next(), Some(&b2));
        assert_eq!(iter.next(), Some(&b2));
        assert_eq!(iter.next(), Some(&b1));
        assert_eq!(iter.next(), Some(&b2));
        assert_eq!(iter.next(), Some(&b3));
        assert_eq!(iter.next(), Some(&b1));

        // different hashes, the first selection should be weighted
        let mut iter = hash.iter(b"test1");
        assert_eq!(iter.next(), Some(&b2));
        let mut iter = hash.iter(b"test2");
        assert_eq!(iter.next(), Some(&b2));
        let mut iter = hash.iter(b"test3");
        assert_eq!(iter.next(), Some(&b3));
        let mut iter = hash.iter(b"test4");
        assert_eq!(iter.next(), Some(&b1));
        let mut iter = hash.iter(b"test5");
        assert_eq!(iter.next(), Some(&b2));
        let mut iter = hash.iter(b"test6");
        assert_eq!(iter.next(), Some(&b2));
        let mut iter = hash.iter(b"test7");
        assert_eq!(iter.next(), Some(&b2));
    }

    #[test]
    fn test_round_robin() {
        let b1 = Backend::new("1.1.1.1:80").unwrap();
        let mut b2 = Backend::new("1.0.0.1:80").unwrap();
        b2.weight = 8; // 8x than the rest
        let b3 = Backend::new("1.0.0.255:80").unwrap();
        // sorted with: [b2, b3, b1]
        // weighted: [0, 0, 0, 0, 0, 0, 0, 0, 1, 2]
        let backends = BTreeSet::from_iter([b1.clone(), b2.clone(), b3.clone()]);
        let hash: Arc<Weighted<RoundRobin>> = Arc::new(Weighted::build(&backends));

        // same hash iter over
        let mut iter = hash.iter(b"test");
        // first, should be weighted
        // weighted: [0, 0, 0, 0, 0, 0, 0, 0, 1, 2]
        //            ^
        assert_eq!(iter.next(), Some(&b2));
        // fallbacks, should be round robin
        assert_eq!(iter.next(), Some(&b3));
        assert_eq!(iter.next(), Some(&b1));
        assert_eq!(iter.next(), Some(&b2));
        assert_eq!(iter.next(), Some(&b3));

        // round robin, ignoring the hash key
        // index advanced 5 steps
        // weighted: [0, 0, 0, 0, 0, 0, 0, 0, 1, 2]
        //                           ^
        let mut iter = hash.iter(b"test1");
        assert_eq!(iter.next(), Some(&b2));
        let mut iter = hash.iter(b"test1");
        assert_eq!(iter.next(), Some(&b2));
        let mut iter = hash.iter(b"test1");
        assert_eq!(iter.next(), Some(&b2));
        let mut iter = hash.iter(b"test1");
        assert_eq!(iter.next(), Some(&b3));
        let mut iter = hash.iter(b"test1");
        assert_eq!(iter.next(), Some(&b1));
        let mut iter = hash.iter(b"test1");
        // rounded
        assert_eq!(iter.next(), Some(&b2));
        let mut iter = hash.iter(b"test1");
        assert_eq!(iter.next(), Some(&b2));
    }

    #[test]
    fn test_random() {
        let b1 = Backend::new("1.1.1.1:80").unwrap();
        let mut b2 = Backend::new("1.0.0.1:80").unwrap();
        b2.weight = 8; // 8x than the rest
        let b3 = Backend::new("1.0.0.255:80").unwrap();
        let backends = BTreeSet::from_iter([b1.clone(), b2.clone(), b3.clone()]);
        let hash: Arc<Weighted<Random>> = Arc::new(Weighted::build(&backends));

        let mut count = HashMap::new();
        count.insert(b1.clone(), 0);
        count.insert(b2.clone(), 0);
        count.insert(b3.clone(), 0);

        for _ in 0..10000 {
            let mut iter = hash.iter(b"test");
            *count.get_mut(iter.next().unwrap()).unwrap() += 1;
        }
        let b2_count = *count.get(&b2).unwrap();
        assert!((7000..=9000).contains(&b2_count));
    }
}


================================================
FILE: pingora-lru/Cargo.toml
================================================
[package]
name = "pingora-lru"
version = "0.8.0"
authors = ["Yuchen Wu <yuchen@cloudflare.com>"]
license = "Apache-2.0"
edition = "2021"
repository = "https://github.com/cloudflare/pingora"
categories = ["algorithms", "caching"]
keywords = ["lru", "cache", "pingora"]
description = """
LRU cache that focuses on memory efficiency, concurrency and persistence.
"""

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "pingora_lru"
path = "src/lib.rs"

[dependencies]
hashbrown = "0"
parking_lot = "0"
arrayvec = "0"
rand = "0.8"

[dev-dependencies]
lru = { workspace = true }

[[bench]]
name = "bench_linked_list"
harness = false

[[bench]]
name = "bench_lru"
harness = false


================================================
FILE: pingora-lru/LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: pingora-lru/benches/bench_linked_list.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::time::Instant;

fn main() {
    const ITEMS: usize = 5_000_000;

    // push bench

    let mut std_list = std::collections::LinkedList::<u64>::new();
    let before = Instant::now();
    for _ in 0..ITEMS {
        std_list.push_front(0);
    }
    let elapsed = before.elapsed();
    println!(
        "std linked list push_front total {elapsed:?}, {:?} avg per operation",
        elapsed / ITEMS as u32
    );

    let mut list = pingora_lru::linked_list::LinkedList::with_capacity(ITEMS);
    let before = Instant::now();
    for _ in 0..ITEMS {
        list.push_head(0);
    }
    let elapsed = before.elapsed();
    println!(
        "pingora linked list push_head total {elapsed:?}, {:?} avg per operation",
        elapsed / ITEMS as u32
    );

    // iter bench

    let mut count = 0;
    let before = Instant::now();
    for _ in std_list.iter() {
        count += 1;
    }
    let elapsed = before.elapsed();
    println!(
        "std linked list iter total {count} {elapsed:?}, {:?} avg per operation",
        elapsed / count as u32
    );

    let mut count = 0;
    let before = Instant::now();
    for _ in list.iter() {
        count += 1;
    }
    let elapsed = before.elapsed();
    println!(
        "pingora linked list iter total {count} {elapsed:?}, {:?} avg per operation",
        elapsed / count as u32
    );

    // search bench

    let before = Instant::now();
    for _ in 0..ITEMS {
        assert!(!std_list.iter().take(10).any(|v| *v == 1));
    }
    let elapsed = before.elapsed();
    println!(
        "std linked search first 10 items total {elapsed:?}, {:?} avg per operation",
        elapsed / ITEMS as u32
    );

    let before = Instant::now();
    for _ in 0..ITEMS {
        assert!(!list.iter().take(10).any(|v| *v == 1));
    }
    let elapsed = before.elapsed();
    println!(
        "pingora linked search first 10 items total {elapsed:?}, {:?} avg per operation",
        elapsed / ITEMS as u32
    );

    let before = Instant::now();
    for _ in 0..ITEMS {
        assert!(!list.exist_near_head(1, 10));
    }
    let elapsed = before.elapsed();
    println!(
        "pingora linked optimized search first 10 items total {elapsed:?}, {:?} avg per operation",
        elapsed / ITEMS as u32
    );

    // move node bench
    let before = Instant::now();
    for _ in 0..ITEMS {
        let value = std_list.pop_back().unwrap();
        std_list.push_front(value);
    }
    let elapsed = before.elapsed();
    println!(
        "std linked list move back to front total {elapsed:?}, {:?} avg per operation",
        elapsed / ITEMS as u32
    );

    let before = Instant::now();
    for _ in 0..ITEMS {
        let index = list.tail().unwrap();
        list.promote(index);
    }
    let elapsed = before.elapsed();
    println!(
        "pingora linked list move tail to head total {elapsed:?}, {:?} avg per operation",
        elapsed / ITEMS as u32
    );

    // pop bench

    let before = Instant::now();
    for _ in 0..ITEMS {
        std_list.pop_back();
    }
    let elapsed = before.elapsed();
    println!(
        "std linked list pop_back {elapsed:?}, {:?} avg per operation",
        elapsed / ITEMS as u32
    );

    let before = Instant::now();
    for _ in 0..ITEMS {
        list.pop_tail();
    }
    let elapsed = before.elapsed();
    println!(
        "pingora linked list pop_tail total {elapsed:?}, {:?} avg per operation",
        elapsed / ITEMS as u32
    );
}


================================================
FILE: pingora-lru/benches/bench_lru.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use rand::distributions::WeightedIndex;
use rand::prelude::*;
use std::sync::Arc;
use std::thread;
use std::time::Instant;

// Non-uniform distributions, 100 items, 10 of them are 100x more likely to appear
const WEIGHTS: &[usize] = &[
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 100, 100, 100,
    100, 100, 100, 100, 100, 100, 100,
];

const ITERATIONS: usize = 5_000_000;
const THREADS: usize = 8;

fn main() {
    let lru = parking_lot::Mutex::new(lru::LruCache::<u64, ()>::unbounded());

    let plru = pingora_lru::Lru::<(), 10>::with_capacity(1000, 100);
    // populate first, then we bench access/promotion
    for i in 0..WEIGHTS.len() {
        lru.lock().put(i as u64, ());
    }
    for i in 0..WEIGHTS.len() {
        plru.admit(i as u64, (), 1);
    }

    // single thread
    let mut rng = thread_rng();
    let dist = WeightedIndex::new(WEIGHTS).unwrap();

    let before = Instant::now();
    for _ in 0..ITERATIONS {
        lru.lock().get(&(dist.sample(&mut rng) as u64));
    }
    let elapsed = before.elapsed();
    println!(
        "lru promote total {elapsed:?}, {:?} avg per operation",
        elapsed / ITERATIONS as u32
    );

    let before = Instant::now();
    for _ in 0..ITERATIONS {
        plru.promote(dist.sample(&mut rng) as u64);
    }
    let elapsed = before.elapsed();
    println!(
        "pingora lru promote total {elapsed:?}, {:?} avg per operation",
        elapsed / ITERATIONS as u32
    );

    let before = Instant::now();
    for _ in 0..ITERATIONS {
        plru.promote_top_n(dist.sample(&mut rng) as u64, 10);
    }
    let elapsed = before.elapsed();
    println!(
        "pingora lru promote_top_10 total {elapsed:?}, {:?} avg per operation",
        elapsed / ITERATIONS as u32
    );

    // concurrent

    let lru = Arc::new(lru);
    let mut handlers = vec![];
    for i in 0..THREADS {
        let lru = lru.clone();
        let handler = thread::spawn(move || {
            let mut rng = thread_rng();
            let dist = WeightedIndex::new(WEIGHTS).unwrap();
            let before = Instant::now();
            for _ in 0..ITERATIONS {
                lru.lock().get(&(dist.sample(&mut rng) as u64));
            }
            let elapsed = before.elapsed();
            println!(
                "lru promote total {elapsed:?}, {:?} avg per operation thread {i}",
                elapsed / ITERATIONS as u32
            );
        });
        handlers.push(handler);
    }
    for thread in handlers {
        thread.join().unwrap();
    }

    let plru = Arc::new(plru);

    let mut handlers = vec![];
    for i in 0..THREADS {
        let plru = plru.clone();
        let handler = thread::spawn(move || {
            let mut rng = thread_rng();
            let dist = WeightedIndex::new(WEIGHTS).unwrap();
            let before = Instant::now();
            for _ in 0..ITERATIONS {
                plru.promote(dist.sample(&mut rng) as u64);
            }
            let elapsed = before.elapsed();
            println!(
                "pingora lru promote total {elapsed:?}, {:?} avg per operation thread {i}",
                elapsed / ITERATIONS as u32
            );
        });
        handlers.push(handler);
    }
    for thread in handlers {
        thread.join().unwrap();
    }

    let mut handlers = vec![];
    for i in 0..THREADS {
        let plru = plru.clone();
        let handler = thread::spawn(move || {
            let mut rng = thread_rng();
            let dist = WeightedIndex::new(WEIGHTS).unwrap();
            let before = Instant::now();
            for _ in 0..ITERATIONS {
                plru.promote_top_n(dist.sample(&mut rng) as u64, 10);
            }
            let elapsed = before.elapsed();
            println!(
                "pingora lru promote_top_10 total {elapsed:?}, {:?} avg per operation thread {i}",
                elapsed / ITERATIONS as u32
            );
        });
        handlers.push(handler);
    }
    for thread in handlers {
        thread.join().unwrap();
    }
}


================================================
FILE: pingora-lru/src/lib.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! An implementation of an LRU that focuses on memory efficiency, concurrency and persistence
//!
//! Features
//! - keys can have different sizes
//! - LRUs are sharded to avoid global locks.
//! - Memory layout and usage are optimized: small and no memory fragmentation

pub mod linked_list;

use linked_list::{LinkedList, LinkedListIter};

use hashbrown::HashMap;
use parking_lot::RwLock;
use std::sync::atomic::{AtomicUsize, Ordering};

/// The LRU with `N` shards
pub struct Lru<T, const N: usize> {
    units: [RwLock<LruUnit<T>>; N],
    weight: AtomicUsize,
    weight_limit: usize,
    len_watermark: Option<usize>,
    len: AtomicUsize,
    evicted_weight: AtomicUsize,
    evicted_len: AtomicUsize,
}

impl<T, const N: usize> Lru<T, N> {
    /// Create an [Lru] with the given weight limit and predicted capacity.
    ///
    /// The capacity is per shard (for simplicity). So the total capacity = capacity * N
    pub fn with_capacity(weight_limit: usize, capacity: usize) -> Self {
        Self::with_capacity_and_watermark(weight_limit, capacity, None)
    }

    /// Create an [Lru] with the given weight limit, predicted capacity and optional watermark
    ///
    /// The capacity is per shard (for simplicity). So the total capacity = capacity * N
    ///
    /// The watermark indicates at what count we should begin evicting and acts as a limit
    /// on the total number of allowed items.
    pub fn with_capacity_and_watermark(
        weight_limit: usize,
        capacity: usize,
        len_watermark: Option<usize>,
    ) -> Self {
        // use the unsafe code from ArrayVec just to init the array
        let mut units = arrayvec::ArrayVec::<_, N>::new();
        for _ in 0..N {
            units.push(RwLock::new(LruUnit::with_capacity(capacity)));
        }
        Lru {
            units: units.into_inner().map_err(|_| "").unwrap(),
            weight: AtomicUsize::new(0),
            weight_limit,
            len_watermark,
            len: AtomicUsize::new(0),
            evicted_weight: AtomicUsize::new(0),
            evicted_len: AtomicUsize::new(0),
        }
    }

    /// Admit the key value to the [Lru]
    ///
    /// Return the shard index which the asset is added to
    pub fn admit(&self, key: u64, data: T, weight: usize) -> usize {
        let shard = get_shard(key, N);
        let unit = &mut self.units[shard].write();

        // Make sure weight is positive otherwise eviction won't work
        // TODO: Probably should use NonZeroUsize instead
        let weight = weight.max(1);

        let old_weight = unit.admit(key, data, weight);
        if old_weight != weight {
            self.weight.fetch_add(weight, Ordering::Relaxed);
            if old_weight > 0 {
                self.weight.fetch_sub(old_weight, Ordering::Relaxed);
            } else {
                // Assume old_weight == 0 means a new item is admitted
                self.len.fetch_add(1, Ordering::Relaxed);
            }
        }
        shard
    }

    /// Increment the weight associated with a given key, up to an optional max weight.
    /// If a `max_weight` is provided, the weight cannot exceed this max weight. If the current
    /// weight is higher than the max, it will be capped to the max.
    ///
    /// Return the total new weight. 0 indicates the key did not exist.
    pub fn increment_weight(&self, key: u64, delta: usize, max_weight: Option<usize>) -> usize {
        let shard = get_shard(key, N);
        let unit = &mut self.units[shard].write();
        if let Some((old_weight, new_weight)) = unit.increment_weight(key, delta, max_weight) {
            if new_weight >= old_weight {
                self.weight
                    .fetch_add(new_weight - old_weight, Ordering::Relaxed);
            } else {
                self.weight
                    .fetch_sub(old_weight - new_weight, Ordering::Relaxed);
            }
            new_weight
        } else {
            0
        }
    }

    /// Promote the key to the head of the LRU
    ///
    /// Return `true` if the key exists.
    pub fn promote(&self, key: u64) -> bool {
        self.units[get_shard(key, N)].write().access(key)
    }

    /// Promote to the top n of the LRU
    ///
    /// This function is a bit more efficient in terms of reducing lock contention because it
    /// will acquire a write lock only if the key is outside top n but only acquires a read lock
    /// when the key is already in the top n.
    ///
    /// Return false if the item doesn't exist
    pub fn promote_top_n(&self, key: u64, top: usize) -> bool {
        let unit = &self.units[get_shard(key, N)];
        if !unit.read().need_promote(key, top) {
            return true;
        }
        unit.write().access(key)
    }

    /// Evict at most one item from the given shard
    ///
    /// Return the evicted asset and its size if there is anything to evict
    pub fn evict_shard(&self, shard: u64) -> Option<(T, usize)> {
        let evicted = self.units[get_shard(shard, N)].write().evict();
        if let Some((_, weight)) = evicted.as_ref() {
            self.weight.fetch_sub(*weight, Ordering::Relaxed);
            self.len.fetch_sub(1, Ordering::Relaxed);
            self.evicted_weight.fetch_add(*weight, Ordering::Relaxed);
            self.evicted_len.fetch_add(1, Ordering::Relaxed);
        }
        evicted
    }

    /// Evict the [Lru] until the overall weight is below the limit (or the configured watermark).
    ///
    /// Return a list of evicted items.
    ///
    /// The evicted items are randomly selected from all the shards.
    pub fn evict_to_limit(&self) -> Vec<(T, usize)> {
        let mut evicted = vec![];
        let mut initial_weight = self.weight();
        let mut initial_len = self.len();
        let mut shard_seed = rand::random(); // start from a random shard
        let mut empty_shard = 0;

        // Entries can be admitted or removed from the LRU by others during the loop below
        // Track initial size not to over evict due to entries admitted after the loop starts
        // self.weight() / self.len() is also used not to over evict
        // due to entries already removed by others
        while ((initial_weight > self.weight_limit && self.weight() > self.weight_limit)
            || self
                .len_watermark
                .is_some_and(|w| initial_len > w && self.len() > w))
            && empty_shard < N
        {
            if let Some(i) = self.evict_shard(shard_seed) {
                initial_weight -= i.1;
                initial_len = initial_len.saturating_sub(1);
                evicted.push(i)
            } else {
                empty_shard += 1;
            }
            // move on to the next shard
            shard_seed += 1;
        }
        evicted
    }

    /// Remove the given asset.
    pub fn remove(&self, key: u64) -> Option<(T, usize)> {
        let removed = self.units[get_shard(key, N)].write().remove(key);
        if let Some((_, weight)) = removed.as_ref() {
            self.weight.fetch_sub(*weight, Ordering::Relaxed);
            self.len.fetch_sub(1, Ordering::Relaxed);
        }
        removed
    }

    /// Insert the item to the tail of this LRU.
    ///
    /// Useful to recreate an LRU in most-to-least order
    pub fn insert_tail(&self, key: u64, data: T, weight: usize) -> bool {
        if self.units[get_shard(key, N)]
            .write()
            .insert_tail(key, data, weight)
        {
            self.weight.fetch_add(weight, Ordering::Relaxed);
            self.len.fetch_add(1, Ordering::Relaxed);
            true
        } else {
            false
        }
    }

    /// Check existence of a key without changing the order in LRU.
    pub fn peek(&self, key: u64) -> bool {
        self.units[get_shard(key, N)].read().peek(key).is_some()
    }

    /// Check the weight of a key without changing the order in LRU.
    pub fn peek_weight(&self, key: u64) -> Option<usize> {
        self.units[get_shard(key, N)].read().peek_weight(key)
    }

    /// Return the current total weight.
    pub fn weight(&self) -> usize {
        self.weight.load(Ordering::Relaxed)
    }

    /// Return the total weight of items evicted from this [Lru].
    pub fn evicted_weight(&self) -> usize {
        self.evicted_weight.load(Ordering::Relaxed)
    }

    /// Return the total count of items evicted from this [Lru].
    pub fn evicted_len(&self) -> usize {
        self.evicted_len.load(Ordering::Relaxed)
    }

    /// The number of items inside this [Lru].
    #[allow(clippy::len_without_is_empty)]
    pub fn len(&self) -> usize {
        self.len.load(Ordering::Relaxed)
    }

    /// Scan a shard with the given function F
    pub fn iter_for_each<F>(&self, shard: usize, f: F)
    where
        F: FnMut((&T, usize)),
    {
        assert!(shard < N);
        self.units[shard].read().iter().for_each(f);
    }

    /// Get the total number of shards
    pub const fn shards(&self) -> usize {
        N
    }

    /// Get the number of items inside a shard
    pub fn shard_len(&self, shard: usize) -> usize {
        self.units[shard].read().len()
    }

    /// Get the weight (total size) inside a shard
    pub fn shard_weight(&self, shard: usize) -> usize {
        self.units[shard].read().used_weight
    }
}

#[inline]
fn get_shard(key: u64, n_shards: usize) -> usize {
    (key % n_shards as u64) as usize
}

struct LruNode<T> {
    data: T,
    list_index: usize,
    weight: usize,
}

struct LruUnit<T> {
    lookup_table: HashMap<u64, Box<LruNode<T>>>,
    order: LinkedList,
    used_weight: usize,
}

impl<T> LruUnit<T> {
    fn with_capacity(capacity: usize) -> Self {
        LruUnit {
            lookup_table: HashMap::with_capacity(capacity),
            order: LinkedList::with_capacity(capacity),
            used_weight: 0,
        }
    }

    /// Peek data associated with key, if it exists.
    pub fn peek(&self, key: u64) -> Option<&T> {
        self.lookup_table.get(&key).map(|n| &n.data)
    }

    /// Peek weight associated with key, if it exists.
    pub fn peek_weight(&self, key: u64) -> Option<usize> {
        self.lookup_table.get(&key).map(|n| n.weight)
    }

    /// Admit into LRU, return old weight if there was any.
    pub fn admit(&mut self, key: u64, data: T, weight: usize) -> usize {
        if let Some(node) = self.lookup_table.get_mut(&key) {
            let old_weight = Self::adjust_weight(node, &mut self.used_weight, weight);
            node.data = data;
            self.order.promote(node.list_index);
            return old_weight;
        }
        self.used_weight += weight;
        let list_index = self.order.push_head(key);
        let node = Box::new(LruNode {
            data,
            list_index,
            weight,
        });
        self.lookup_table.insert(key, node);
        0
    }

    /// Increase the weight of an existing key. Returns the new weight or 0 if the key did not
    /// exist, along with the new weight (or 0).
    ///
    /// If a `max_weight` is provided, the weight cannot exceed this max weight. If the current
    /// weight is higher than the max, it will be capped to the max.
    pub fn increment_weight(
        &mut self,
        key: u64,
        delta: usize,
        max_weight: Option<usize>,
    ) -> Option<(usize, usize)> {
        if let Some(node) = self.lookup_table.get_mut(&key) {
            let new_weight =
                max_weight.map_or(node.weight + delta, |m| (node.weight + delta).min(m));
            let old_weight = Self::adjust_weight(node, &mut self.used_weight, new_weight);
            self.order.promote(node.list_index);
            return Some((old_weight, new_weight));
        }
        None
    }

    pub fn access(&mut self, key: u64) -> bool {
        if let Some(node) = self.lookup_table.get(&key) {
            self.order.promote(node.list_index);
            true
        } else {
            false
        }
    }

    // Check if a key is already in the top n most recently used nodes.
    // this is a heuristic to reduce write, which requires exclusive locks, for promotion,
    // especially on very populate nodes
    // NOTE: O(n) search here so limit needs to be small
    pub fn need_promote(&self, key: u64, limit: usize) -> bool {
        !self.order.exist_near_head(key, limit)
    }

    // try to evict 1 node
    pub fn evict(&mut self) -> Option<(T, usize)> {
        self.order.pop_tail().map(|key| {
            // unwrap is safe because we always insert in both the hashtable and the list
            let node = self.lookup_table.remove(&key).unwrap();
            self.used_weight -= node.weight;
            (node.data, node.weight)
        })
    }
    // TODO: scan the tail up to K elements to decide which ones to evict

    pub fn remove(&mut self, key: u64) -> Option<(T, usize)> {
        self.lookup_table.remove(&key).map(|node| {
            let list_key = self.order.remove(node.list_index);
            assert_eq!(key, list_key);
            self.used_weight -= node.weight;
            (node.data, node.weight)
        })
    }

    pub fn insert_tail(&mut self, key: u64, data: T, weight: usize) -> bool {
        if self.lookup_table.contains_key(&key) {
            return false;
        }
        let list_index = self.order.push_tail(key);
        let node = Box::new(LruNode {
            data,
            list_index,
            weight,
        });
        self.lookup_table.insert(key, node);
        self.used_weight += weight;
        true
    }

    pub fn len(&self) -> usize {
        assert_eq!(self.lookup_table.len(), self.order.len());
        self.lookup_table.len()
    }

    #[cfg(test)]
    pub fn used_weight(&self) -> usize {
        self.used_weight
    }

    pub fn iter(&self) -> LruUnitIter<'_, T> {
        LruUnitIter {
            unit: self,
            iter: self.order.iter(),
        }
    }

    // Adjusts node weight to the new given weight.
    // Returns old weight.
    #[inline]
    fn adjust_weight(node: &mut LruNode<T>, used_weight: &mut usize, weight: usize) -> usize {
        let old_weight = node.weight;
        if weight != old_weight {
            *used_weight += weight;
            *used_weight -= old_weight;
            node.weight = weight;
        }
        old_weight
    }
}

struct LruUnitIter<'a, T> {
    unit: &'a LruUnit<T>,
    iter: LinkedListIter<'a>,
}

impl<'a, T> Iterator for LruUnitIter<'a, T> {
    type Item = (&'a T, usize);

    fn next(&mut self) -> Option<Self::Item> {
        self.iter.next().map(|key| {
            // safe because we always items in table and list are always 1:1
            let node = self.unit.lookup_table.get(key).unwrap();
            (&node.data, node.weight)
        })
    }

    fn size_hint(&self) -> (usize, Option<usize>) {
        self.iter.size_hint()
    }
}

impl<T> DoubleEndedIterator for LruUnitIter<'_, T> {
    fn next_back(&mut self) -> Option<Self::Item> {
        self.iter.next_back().map(|key| {
            // safe because we always items in table and list are always 1:1
            let node = self.unit.lookup_table.get(key).unwrap();
            (&node.data, node.weight)
        })
    }
}

#[cfg(test)]
mod test_lru {
    use super::*;

    fn assert_lru<T: Copy + PartialEq + std::fmt::Debug, const N: usize>(
        lru: &Lru<T, N>,
        values: &[T],
        shard: usize,
    ) {
        let mut list_values = vec![];
        lru.iter_for_each(shard, |(v, _)| list_values.push(*v));
        assert_eq!(values, &list_values)
    }

    #[test]
    fn test_admit() {
        let lru = Lru::<_, 2>::with_capacity(30, 10);
        assert_eq!(lru.len(), 0);

        lru.admit(2, 2, 3);
        assert_eq!(lru.len(), 1);
        assert_eq!(lru.weight(), 3);

        lru.admit(2, 2, 1);
        assert_eq!(lru.len(), 1);
        assert_eq!(lru.weight(), 1);

        lru.admit(2, 2, 2); // admit again with different weight
        assert_eq!(lru.len(), 1);
        assert_eq!(lru.weight(), 2);

        lru.admit(3, 3, 3);
        lru.admit(4, 4, 4);

        assert_eq!(lru.weight(), 2 + 3 + 4);
        assert_eq!(lru.len(), 3);
    }

    #[test]
    fn test_promote() {
        let lru = Lru::<_, 2>::with_capacity(30, 10);

        lru.admit(2, 2, 2);
        lru.admit(3, 3, 3);
        lru.admit(4, 4, 4);
        lru.admit(5, 5, 5);
        lru.admit(6, 6, 6);
        assert_lru(&lru, &[6, 4, 2], 0);
        assert_lru(&lru, &[5, 3], 1);

        assert!(lru.promote(3));
        assert_lru(&lru, &[3, 5], 1);
        assert!(lru.promote(3));
        assert_lru(&lru, &[3, 5], 1);

        assert!(lru.promote(2));
        assert_lru(&lru, &[2, 6, 4], 0);

        assert!(!lru.promote(7)); // 7 doesn't exist
        assert_lru(&lru, &[2, 6, 4], 0);
        assert_lru(&lru, &[3, 5], 1);

        // promote 2 to top 1, already there
        assert!(lru.promote_top_n(2, 1));
        assert_lru(&lru, &[2, 6, 4], 0);

        // promote 4 to top 3, already there
        assert!(lru.promote_top_n(4, 3));
        assert_lru(&lru, &[2, 6, 4], 0);

        // promote 4 to top 2
        assert!(lru.promote_top_n(4, 2));
        assert_lru(&lru, &[4, 2, 6], 0);

        // promote 2 to top 1
        assert!(lru.promote_top_n(2, 1));
        assert_lru(&lru, &[2, 4, 6], 0);

        assert!(!lru.promote_top_n(7, 1)); // 7 doesn't exist
    }

    #[test]
    fn test_evict() {
        let lru = Lru::<_, 2>::with_capacity(14, 10);

        // same weight to make the random eviction less random
        lru.admit(2, 2, 2);
        lru.admit(3, 3, 2);
        lru.admit(4, 4, 4);
        lru.admit(5, 5, 4);
        lru.admit(6, 6, 2);
        lru.admit(7, 7, 2);

        assert_lru(&lru, &[6, 4, 2], 0);
        assert_lru(&lru, &[7, 5, 3], 1);

        assert_eq!(lru.weight(), 16);
        assert_eq!(lru.len(), 6);

        let evicted = lru.evict_to_limit();
        assert_eq!(lru.weight(), 14);
        assert_eq!(lru.len(), 5);
        assert_eq!(lru.evicted_weight(), 2);
        assert_eq!(lru.evicted_len(), 1);
        assert_eq!(evicted.len(), 1);
        assert_eq!(evicted[0].1, 2); //weight
        assert!(evicted[0].0 == 2 || evicted[0].0 == 3); //either 2 or 3 are evicted

        let lru = Lru::<_, 2>::with_capacity(6, 10);

        // same weight random eviction less random
        lru.admit(2, 2, 2);
        lru.admit(3, 3, 2);
        lru.admit(4, 4, 2);
        lru.admit(5, 5, 2);
        lru.admit(6, 6, 2);
        lru.admit(7, 7, 2);
        assert_eq!(lru.weight(), 12);
        assert_eq!(lru.len(), 6);

        let evicted = lru.evict_to_limit();
        // NOTE: there is a low chance this test would fail see the TODO in evict_to_limit
        assert_eq!(lru.weight(), 6);
        assert_eq!(lru.len(), 3);
        assert_eq!(lru.evicted_weight(), 6);
        assert_eq!(lru.evicted_len(), 3);
        assert_eq!(evicted.len(), 3);
    }

    #[test]
    fn test_increment_weight() {
        let lru = Lru::<_, 2>::with_capacity(6, 10);
        lru.admit(1, 1, 1);
        lru.increment_weight(1, 1, None);
        assert_eq!(lru.weight(), 1 + 1);

        lru.increment_weight(0, 1000, None);
        assert_eq!(lru.weight(), 1 + 1);

        lru.admit(2, 2, 2);
        lru.increment_weight(2, 2, None);
        assert_eq!(lru.weight(), 1 + 1 + 2 + 2);

        lru.increment_weight(2, 2, Some(3));
        assert_eq!(lru.weight(), 1 + 1 + 3);
    }

    #[test]
    fn test_remove() {
        let lru = Lru::<_, 2>::with_capacity(30, 10);
        lru.admit(2, 2, 2);
        lru.admit(3, 3, 3);
        lru.admit(4, 4, 4);
        lru.admit(5, 5, 5);
        lru.admit(6, 6, 6);

        assert_eq!(lru.weight(), 2 + 3 + 4 + 5 + 6);
        assert_eq!(lru.len(), 5);
        assert_lru(&lru, &[6, 4, 2], 0);
        assert_lru(&lru, &[5, 3], 1);

        let node = lru.remove(6).unwrap();
        assert_eq!(node.0, 6); // data
        assert_eq!(node.1, 6); // weight
        assert_eq!(lru.weight(), 2 + 3 + 4 + 5);
        assert_eq!(lru.len(), 4);
        assert_lru(&lru, &[4, 2], 0);

        let node = lru.remove(3).unwrap();
        assert_eq!(node.0, 3); // data
        assert_eq!(node.1, 3); // weight
        assert_eq!(lru.weight(), 2 + 4 + 5);
        assert_eq!(lru.len(), 3);
        assert_lru(&lru, &[5], 1);

        assert!(lru.remove(7).is_none());
    }

    #[test]
    fn test_peek() {
        let lru = Lru::<_, 2>::with_capacity(30, 10);
        lru.admit(2, 2, 2);
        lru.admit(3, 3, 3);
        lru.admit(4, 4, 4);

        assert!(lru.peek(4));
        assert!(lru.peek(3));
        assert!(lru.peek(2));

        assert_lru(&lru, &[4, 2], 0);
        assert_lru(&lru, &[3], 1);
    }

    #[test]
    fn test_insert_tail() {
        let lru = Lru::<_, 2>::with_capacity(30, 10);
        lru.admit(2, 2, 2);
        lru.admit(3, 3, 3);
        lru.admit(4, 4, 4);
        lru.admit(5, 5, 5);
        lru.admit(6, 6, 6);

        assert_eq!(lru.weight(), 2 + 3 + 4 + 5 + 6);
        assert_eq!(lru.len(), 5);
        assert_lru(&lru, &[6, 4, 2], 0);
        assert_lru(&lru, &[5, 3], 1);

        assert!(lru.insert_tail(7, 7, 7));
        assert_eq!(lru.weight(), 2 + 3 + 4 + 5 + 6 + 7);
        assert_eq!(lru.len(), 6);
        assert_lru(&lru, &[5, 3, 7], 1);

        // ignore existing ones
        assert!(!lru.insert_tail(6, 6, 7));
    }

    #[test]
    fn test_watermark_eviction() {
        const WEIGHT_LIMIT: usize = usize::MAX / 2;
        let lru = Lru::<u64, 2>::with_capacity_and_watermark(WEIGHT_LIMIT, 10, Some(4));

        // admit 6 items, each weight 1
        for k in [2u64, 3, 4, 5, 6, 7] {
            lru.admit(k, k, 1);
        }

        assert!(lru.weight() < WEIGHT_LIMIT);
        assert_eq!(lru.len(), 6);

        let evicted = lru.evict_to_limit();
        assert_eq!(lru.len(), 4);
        assert_eq!(evicted.len(), 2);
        assert_eq!(lru.evicted_len(), 2);
    }
}

#[cfg(test)]
mod test_lru_unit {
    use super::*;

    fn assert_lru<T: Copy + PartialEq + std::fmt::Debug>(lru: &LruUnit<T>, values: &[T]) {
        let list_values: Vec<_> = lru.iter().map(|(v, _)| *v).collect();
        assert_eq!(values, &list_values)
    }

    #[test]
    fn test_admit() {
        let mut lru = LruUnit::with_capacity(10);
        assert_eq!(lru.len(), 0);
        assert!(lru.peek(0).is_none());

        lru.admit(2, 2, 1);
        assert_eq!(lru.len(), 1);
        assert_eq!(lru.peek(2).unwrap(), &2);
        assert_eq!(lru.used_weight(), 1);

        lru.admit(2, 2, 2); // admit again with different weight
        assert_eq!(lru.used_weight(), 2);

        lru.admit(3, 3, 3);
        lru.admit(4, 4, 4);

        assert_eq!(lru.used_weight(), 2 + 3 + 4);
        assert_lru(&lru, &[4, 3, 2]);
    }

    #[test]
    fn test_access() {
        let mut lru = LruUnit::with_capacity(10);

        lru.admit(2, 2, 2);
        lru.admit(3, 3, 3);
        lru.admit(4, 4, 4);
        assert_lru(&lru, &[4, 3, 2]);

        assert!(lru.access(3));
        assert_lru(&lru, &[3, 4, 2]);
        assert!(lru.access(3));
        assert_lru(&lru, &[3, 4, 2]);
        assert!(lru.access(2));
        assert_lru(&lru, &[2, 3, 4]);

        assert!(!lru.access(5)); // 5 doesn't exist
        assert_lru(&lru, &[2, 3, 4]);

        assert!(!lru.need_promote(2, 1));
        assert!(lru.need_promote(3, 1));
        assert!(!lru.need_promote(4, 9999));
    }

    #[test]
    fn test_evict() {
        let mut lru = LruUnit::with_capacity(10);

        lru.admit(2, 2, 2);
        lru.admit(3, 3, 3);
        lru.admit(4, 4, 4);
        assert_lru(&lru, &[4, 3, 2]);

        assert!(lru.access(3));
        assert!(lru.access(3));
        assert!(lru.access(2));
        assert_lru(&lru, &[2, 3, 4]);

        assert_eq!(lru.used_weight(), 2 + 3 + 4);
        assert_eq!(lru.evict(), Some((4, 4)));
        assert_eq!(lru.used_weight(), 2 + 3);
        assert_lru(&lru, &[2, 3]);

        assert_eq!(lru.evict(), Some((3, 3)));
        assert_eq!(lru.used_weight(), 2);
        assert_lru(&lru, &[2]);

        assert_eq!(lru.evict(), Some((2, 2)));
        assert_eq!(lru.used_weight(), 0);
        assert_lru(&lru, &[]);

        assert_eq!(lru.evict(), None);
        assert_eq!(lru.used_weight(), 0);
        assert_lru(&lru, &[]);
    }

    #[test]
    fn test_increment_weight() {
        let mut lru = LruUnit::with_capacity(10);
        lru.admit(1, 1, 1);
        lru.increment_weight(1, 1, None);
        assert_eq!(lru.used_weight(), 1 + 1);

        lru.increment_weight(0, 1000, None);
        assert_eq!(lru.used_weight(), 1 + 1);

        lru.admit(2, 2, 2);
        lru.increment_weight(2, 2, None);
        assert_eq!(lru.used_weight(), 1 + 1 + 2 + 2);

        lru.admit(3, 3, 3);
        lru.increment_weight(3, 3, Some(5));
        assert_eq!(lru.used_weight(), 1 + 1 + 2 + 2 + 3 + 2);

        lru.increment_weight(3, 3, Some(3));
        assert_eq!(lru.used_weight(), 1 + 1 + 2 + 2 + 3);
    }

    #[test]
    fn test_remove() {
        let mut lru = LruUnit::with_capacity(10);

        lru.admit(2, 2, 2);
        lru.admit(3, 3, 3);
        lru.admit(4, 4, 4);
        lru.admit(5, 5, 5);
        assert_lru(&lru, &[5, 4, 3, 2]);

        assert!(lru.access(4));
        assert!(lru.access(3));
        assert!(lru.access(3));
        assert!(lru.access(2));
        assert_lru(&lru, &[2, 3, 4, 5]);

        assert_eq!(lru.used_weight(), 2 + 3 + 4 + 5);
        assert_eq!(lru.remove(2), Some((2, 2)));
        assert_eq!(lru.used_weight(), 3 + 4 + 5);
        assert_lru(&lru, &[3, 4, 5]);

        assert_eq!(lru.remove(4), Some((4, 4)));
        assert_eq!(lru.used_weight(), 3 + 5);
        assert_lru(&lru, &[3, 5]);

        assert_eq!(lru.remove(5), Some((5, 5)));
        assert_eq!(lru.used_weight(), 3);
        assert_lru(&lru, &[3]);

        assert_eq!(lru.remove(1), None);
        assert_eq!(lru.used_weight(), 3);
        assert_lru(&lru, &[3]);

        assert_eq!(lru.remove(3), Some((3, 3)));
        assert_eq!(lru.used_weight(), 0);
        assert_lru(&lru, &[]);
    }

    #[test]
    fn test_insert_tail() {
        let mut lru = LruUnit::with_capacity(10);
        assert_eq!(lru.len(), 0);
        assert!(lru.peek(0).is_none());

        assert!(lru.insert_tail(2, 2, 1));
        assert_eq!(lru.len(), 1);
        assert_eq!(lru.peek(2).unwrap(), &2);
        assert_eq!(lru.used_weight(), 1);

        assert!(!lru.insert_tail(2, 2, 2));
        assert!(lru.insert_tail(3, 3, 3));
        assert_eq!(lru.used_weight(), 1 + 3);
        assert_lru(&lru, &[2, 3]);

        assert!(lru.insert_tail(4, 4, 4));
        assert!(lru.insert_tail(5, 5, 5));
        assert_eq!(lru.used_weight(), 1 + 3 + 4 + 5);
        assert_lru(&lru, &[2, 3, 4, 5]);
    }
}


================================================
FILE: pingora-lru/src/linked_list.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Can't tell people you know Rust until you write a (doubly) linked list

//! Doubly linked list
//!
//! Features
//! - Preallocate consecutive memory, no memory fragmentation.
//! - No shrink function: for Lru cache that grows to a certain size but never shrinks.
//! - Relatively fast and efficient.

// inspired by clru::FixedSizeList (Élie!)

use std::mem::replace;

type Index = usize;
const NULL: Index = usize::MAX;
const HEAD: Index = 0;
const TAIL: Index = 1;
const OFFSET: usize = 2;

#[derive(Debug)]
struct Node {
    pub(crate) prev: Index,
    pub(crate) next: Index,
    pub(crate) data: u64,
}

// Functionally the same as vec![head, tail, data_nodes...] where head & tail are fixed and
// the rest data nodes can expand. Both head and tail can be accessed faster than using index
struct Nodes {
    // we use these sentinel nodes to guard the head and tail of the list so that list
    // manipulation is simpler (fewer if-else)
    head: Node,
    tail: Node,
    data_nodes: Vec<Node>,
}

impl Nodes {
    fn with_capacity(capacity: usize) -> Self {
        Nodes {
            head: Node {
                prev: NULL,
                next: TAIL,
                data: 0,
            },
            tail: Node {
                prev: HEAD,
                next: NULL,
                data: 0,
            },
            data_nodes: Vec::with_capacity(capacity),
        }
    }

    fn new_node(&mut self, data: u64) -> Index {
        const VEC_EXP_GROWTH_CAP: usize = 65536;
        let node = Node {
            prev: NULL,
            next: NULL,
            data,
        };
        // Constrain the growth of vec: vec always double its capacity when it needs to grow.
        // It could waste too much memory when it is already very large.
        // Here we limit the memory waste to 10% once it grows beyond the cap.
        // The amortized growth cost is O(n) beyond the max of the initially reserved capacity and
        // the cap. But this list is for limited sized LRU and we recycle released node, so
        // hopefully insertions are rare beyond certain sizes
        if self.data_nodes.capacity() > VEC_EXP_GROWTH_CAP
            && self.data_nodes.capacity() - self.data_nodes.len() < 2
        {
            self.data_nodes
                .reserve_exact(self.data_nodes.capacity() / 10)
        }
        self.data_nodes.push(node);
        self.data_nodes.len() - 1 + OFFSET
    }

    fn len(&self) -> usize {
        self.data_nodes.len()
    }

    fn head(&self) -> &Node {
        &self.head
    }

    fn tail(&self) -> &Node {
        &self.tail
    }
}

impl std::ops::Index<usize> for Nodes {
    type Output = Node;

    fn index(&self, index: usize) -> &Self::Output {
        match index {
            HEAD => &self.head,
            TAIL => &self.tail,
            _ => &self.data_nodes[index - OFFSET],
        }
    }
}

impl std::ops::IndexMut<usize> for Nodes {
    fn index_mut(&mut self, index: usize) -> &mut Self::Output {
        match index {
            HEAD => &mut self.head,
            TAIL => &mut self.tail,
            _ => &mut self.data_nodes[index - OFFSET],
        }
    }
}

/// Doubly linked list
pub struct LinkedList {
    nodes: Nodes,
    free: Vec<Index>, // to keep track of freed node to be used again
}
// Panic when index used as parameters are invalid
// Index returned by push_* is always valid.
impl LinkedList {
    /// Create a [LinkedList] with the given predicted capacity.
    pub fn with_capacity(capacity: usize) -> Self {
        LinkedList {
            nodes: Nodes::with_capacity(capacity),
            free: vec![],
        }
    }

    // Allocate a new node and return its index
    // NOTE: this node is leaked if not used by caller
    fn new_node(&mut self, data: u64) -> Index {
        if let Some(index) = self.free.pop() {
            // have a free node, update its payload and return its index
            self.nodes[index].data = data;
            index
        } else {
            // create a new node
            self.nodes.new_node(data)
        }
    }

    /// How many nodes in the list
    #[allow(clippy::len_without_is_empty)]
    pub fn len(&self) -> usize {
        // exclude the 2 sentinels
        self.nodes.len() - self.free.len()
    }

    fn valid_index(&self, index: Index) -> bool {
        index != HEAD && index != TAIL && index < self.nodes.len() + OFFSET
        // TODO: check node prev/next not NULL
        // TODO: debug_check index not in self.free
    }

    fn node(&self, index: Index) -> Option<&Node> {
        if self.valid_index(index) {
            Some(&self.nodes[index])
        } else {
            None
        }
    }

    /// Peek into the list
    pub fn peek(&self, index: Index) -> Option<u64> {
        self.node(index).map(|n| n.data)
    }

    // safe because the index still needs to be in the range of the vec
    fn peek_unchecked(&self, index: Index) -> &u64 {
        &self.nodes[index].data
    }

    /// Whether the value exists closed (up to search_limit nodes) to the head of the list
    // It can be done via iter().take().find() but this is cheaper
    pub fn exist_near_head(&self, value: u64, search_limit: usize) -> bool {
        let mut current_node = HEAD;
        for _ in 0..search_limit {
            current_node = self.nodes[current_node].next;
            if current_node == TAIL {
                return false;
            }
            if self.nodes[current_node].data == value {
                return true;
            }
        }
        false
    }

    // put a node right after the node at `at`
    fn insert_after(&mut self, node_index: Index, at: Index) {
        assert!(at != TAIL && at != node_index); // can't insert after tail or to itself

        let next = replace(&mut self.nodes[at].next, node_index);

        let node = &mut self.nodes[node_index];
        node.next = next;
        node.prev = at;

        self.nodes[next].prev = node_index;
    }

    /// Put the data at the head of the list.
    pub fn push_head(&mut self, data: u64) -> Index {
        let new_node_index = self.new_node(data);
        self.insert_after(new_node_index, HEAD);
        new_node_index
    }

    /// Put the data at the tail of the list.
    pub fn push_tail(&mut self, data: u64) -> Index {
        let new_node_index = self.new_node(data);
        self.insert_after(new_node_index, self.nodes.tail().prev);
        new_node_index
    }

    // lift the node out of the linked list, to either delete it or insert to another place
    // NOTE: the node is leaked if not used by the caller
    fn lift(&mut self, index: Index) -> u64 {
        // can't touch the sentinels
        assert!(index != HEAD && index != TAIL);

        let node = &mut self.nodes[index];

        // zero out the pointers, useful in case we try to access a freed node
        let prev = replace(&mut node.prev, NULL);
        let next = replace(&mut node.next, NULL);
        let data = node.data;

        // make sure we are accessing a node in the list, not freed already
        assert!(prev != NULL && next != NULL);

        self.nodes[prev].next = next;
        self.nodes[next].prev = prev;

        data
    }

    /// Remove the node at the index, and return the value
    pub fn remove(&mut self, index: Index) -> u64 {
        self.free.push(index);
        self.lift(index)
    }

    /// Remove the tail of the list
    pub fn pop_tail(&mut self) -> Option<u64> {
        let data_tail = self.nodes.tail().prev;
        if data_tail == HEAD {
            None // empty list
        } else {
            Some(self.remove(data_tail))
        }
    }

    /// Put the node at the index to the head
    pub fn promote(&mut self, index: Index) {
        if self.nodes.head().next == index {
            return; // already head
        }
        self.lift(index);
        self.insert_after(index, HEAD);
    }

    fn next(&self, index: Index) -> Index {
        self.nodes[index].next
    }

    fn prev(&self, index: Index) -> Index {
        self.nodes[index].prev
    }

    /// Get the head of the list
    pub fn head(&self) -> Option<Index> {
        let data_head = self.nodes.head().next;
        if data_head == TAIL {
            None
        } else {
            Some(data_head)
        }
    }

    /// Get the tail of the list
    pub fn tail(&self) -> Option<Index> {
        let data_tail = self.nodes.tail().prev;
        if data_tail == HEAD {
            None
        } else {
            Some(data_tail)
        }
    }

    /// Iterate over the list
    pub fn iter(&self) -> LinkedListIter<'_> {
        LinkedListIter {
            list: self,
            head: HEAD,
            tail: TAIL,
            len: self.len(),
        }
    }
}

/// The iter over the list
pub struct LinkedListIter<'a> {
    list: &'a LinkedList,
    head: Index,
    tail: Index,
    len: usize,
}

impl<'a> Iterator for LinkedListIter<'a> {
    type Item = &'a u64;

    fn next(&mut self) -> Option<Self::Item> {
        let next_index = self.list.next(self.head);
        if next_index == TAIL || next_index == NULL {
            None
        } else {
            self.head = next_index;
            self.len -= 1;
            Some(self.list.peek_unchecked(next_index))
        }
    }

    fn size_hint(&self) -> (usize, Option<usize>) {
        (self.len, Some(self.len))
    }
}

impl DoubleEndedIterator for LinkedListIter<'_> {
    fn next_back(&mut self) -> Option<Self::Item> {
        let prev_index = self.list.prev(self.tail);
        if prev_index == HEAD || prev_index == NULL {
            None
        } else {
            self.tail = prev_index;
            self.len -= 1;
            Some(self.list.peek_unchecked(prev_index))
        }
    }
}

#[cfg(test)]
mod test {
    use super::*;

    // assert the list is the same as `values`
    fn assert_list(list: &LinkedList, values: &[u64]) {
        let list_values: Vec<_> = list.iter().copied().collect();
        assert_eq!(values, &list_values)
    }

    fn assert_list_reverse(list: &LinkedList, values: &[u64]) {
        let list_values: Vec<_> = list.iter().rev().copied().collect();
        assert_eq!(values, &list_values)
    }

    #[test]
    fn test_insert() {
        let mut list = LinkedList::with_capacity(10);
        assert_eq!(list.len(), 0);
        assert!(list.node(2).is_none());
        assert_eq!(list.head(), None);
        assert_eq!(list.tail(), None);

        let index1 = list.push_head(2);
        assert_eq!(list.len(), 1);
        assert_eq!(list.peek(index1).unwrap(), 2);

        let index2 = list.push_head(3);
        assert_eq!(list.head(), Some(index2));
        assert_eq!(list.tail(), Some(index1));

        let index3 = list.push_tail(4);
        assert_eq!(list.head(), Some(index2));
        assert_eq!(list.tail(), Some(index3));

        assert_list(&list, &[3, 2, 4]);
        assert_list_reverse(&list, &[4, 2, 3]);
    }

    #[test]
    fn test_pop() {
        let mut list = LinkedList::with_capacity(10);
        list.push_head(2);
        list.push_head(3);
        list.push_tail(4);
        assert_list(&list, &[3, 2, 4]);
        assert_eq!(list.pop_tail(), Some(4));
        assert_eq!(list.pop_tail(), Some(2));
        assert_eq!(list.pop_tail(), Some(3));
        assert_eq!(list.pop_tail(), None);
    }

    #[test]
    fn test_promote() {
        let mut list = LinkedList::with_capacity(10);
        let index2 = list.push_head(2);
        let index3 = list.push_head(3);
        let index4 = list.push_tail(4);
        assert_list(&list, &[3, 2, 4]);

        list.promote(index3);
        assert_list(&list, &[3, 2, 4]);

        list.promote(index2);
        assert_list(&list, &[2, 3, 4]);

        list.promote(index4);
        assert_list(&list, &[4, 2, 3]);
    }

    #[test]
    fn test_exist_near_head() {
        let mut list = LinkedList::with_capacity(10);
        list.push_head(2);
        list.push_head(3);
        list.push_tail(4);
        assert_list(&list, &[3, 2, 4]);

        assert!(!list.exist_near_head(4, 1));
        assert!(!list.exist_near_head(4, 2));
        assert!(list.exist_near_head(4, 3));
        assert!(list.exist_near_head(4, 4));
        assert!(list.exist_near_head(4, 99999));
    }
}


================================================
FILE: pingora-memory-cache/Cargo.toml
================================================
[package]
name = "pingora-memory-cache"
version = "0.8.0"
authors = ["Yuchen Wu <yuchen@cloudflare.com>"]
license = "Apache-2.0"
edition = "2021"
repository = "https://github.com/cloudflare/pingora"
categories = ["algorithms", "caching"]
keywords = ["async", "cache", "pingora"]
description = """
An async in-memory cache with cache stampede protection.
"""

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "pingora_memory_cache"
path = "src/lib.rs"

[dependencies]
TinyUFO = { version = "0.8.0", path = "../tinyufo" }
ahash = { workspace = true }
tokio = { workspace = true, features = ["sync"] }
async-trait = { workspace = true }
pingora-error = { version = "0.8.0", path = "../pingora-error" }
log = { workspace = true }
parking_lot = "0"
pingora-timeout = { version = "0.8.0", path = "../pingora-timeout" }

[dev-dependencies]
once_cell = { workspace = true }


================================================
FILE: pingora-memory-cache/LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: pingora-memory-cache/src/lib.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use ahash::RandomState;
use std::borrow::Borrow;
use std::hash::Hash;
use std::marker::PhantomData;
use std::time::{Duration, Instant};

use tinyufo::TinyUfo;

mod read_through;
pub use read_through::{Lookup, MultiLookup, RTCache};

#[derive(Debug, PartialEq, Eq)]
/// [CacheStatus] indicates the response type for a query.
pub enum CacheStatus {
    /// The key was found in the cache
    Hit,
    /// The key was not found.
    Miss,
    /// The key was found but it was expired.
    Expired,
    /// The key was not initially found but was found after awaiting a lock.
    LockHit,
    /// The returned value was expired but still returned. The [Duration] is
    /// how long it has been since its expiration time.
    Stale(Duration),
}

impl CacheStatus {
    /// Return the string representation for [CacheStatus].
    pub fn as_str(&self) -> &str {
        match self {
            Self::Hit => "hit",
            Self::Miss => "miss",
            Self::Expired => "expired",
            Self::LockHit => "lock_hit",
            Self::Stale(_) => "stale",
        }
    }

    /// Returns whether this status represents a cache hit.
    pub fn is_hit(&self) -> bool {
        match self {
            CacheStatus::Hit | CacheStatus::LockHit | CacheStatus::Stale(_) => true,
            CacheStatus::Miss | CacheStatus::Expired => false,
        }
    }

    /// Returns the stale duration if any
    pub fn stale(&self) -> Option<Duration> {
        match self {
            CacheStatus::Stale(time) => Some(*time),
            _ => None,
        }
    }
}

#[derive(Debug, Clone)]
struct Node<T: Clone> {
    pub value: T,
    expire_on: Option<Instant>,
}

impl<T: Clone> Node<T> {
    fn new(value: T, ttl: Option<Duration>) -> Self {
        let expire_on = match ttl {
            Some(t) => Instant::now().checked_add(t),
            None => None,
        };
        Node { value, expire_on }
    }

    fn will_expire_at(&self, time: &Instant) -> bool {
        self.stale_duration(time).is_some()
    }

    fn is_expired(&self) -> bool {
        self.will_expire_at(&Instant::now())
    }

    fn stale_duration(&self, time: &Instant) -> Option<Duration> {
        let expire_time = self.expire_on?;
        if &expire_time <= time {
            Some(time.duration_since(expire_time))
        } else {
            None
        }
    }
}

/// A high performant in-memory cache with S3-FIFO + TinyLFU
pub struct MemoryCache<K: Hash, T: Clone> {
    store: TinyUfo<u64, Node<T>>,
    _key_type: PhantomData<K>,
    pub(crate) hasher: RandomState,
}

impl<K: Hash, T: Clone + Send + Sync + 'static> MemoryCache<K, T> {
    /// Create a new [MemoryCache] with the given size.
    pub fn new(size: usize) -> Self {
        MemoryCache {
            store: TinyUfo::new(size, size),
            _key_type: PhantomData,
            hasher: RandomState::new(),
        }
    }

    /// Fetch the key and return its value in addition to a [CacheStatus].
    pub fn get<Q>(&self, key: &Q) -> (Option<T>, CacheStatus)
    where
        K: Borrow<Q>,
        Q: Hash + ?Sized,
    {
        let hashed_key = self.hasher.hash_one(key);

        if let Some(n) = self.store.get(&hashed_key) {
            if !n.is_expired() {
                (Some(n.value), CacheStatus::Hit)
            } else {
                (None, CacheStatus::Expired)
            }
        } else {
            (None, CacheStatus::Miss)
        }
    }

    /// Similar to [Self::get], fetch the key and return its value in addition to a
    /// [CacheStatus] but also return the value even if it is expired. When the
    /// value is expired, the [Duration] of how long it has been stale will
    /// also be returned.
    pub fn get_stale<Q>(&self, key: &Q) -> (Option<T>, CacheStatus)
    where
        K: Borrow<Q>,
        Q: Hash + ?Sized,
    {
        let hashed_key = self.hasher.hash_one(key);

        if let Some(n) = self.store.get(&hashed_key) {
            let stale_duration = n.stale_duration(&Instant::now());
            if let Some(stale_duration) = stale_duration {
                (Some(n.value), CacheStatus::Stale(stale_duration))
            } else {
                (Some(n.value), CacheStatus::Hit)
            }
        } else {
            (None, CacheStatus::Miss)
        }
    }

    /// Insert a key and value pair with an optional TTL into the cache.
    ///
    /// An item with zero TTL of zero will not be inserted.
    pub fn put<Q>(&self, key: &Q, value: T, ttl: Option<Duration>)
    where
        K: Borrow<Q>,
        Q: Hash + ?Sized,
    {
        if let Some(t) = ttl {
            if t.is_zero() {
                return;
            }
        }
        let hashed_key = self.hasher.hash_one(key);
        let node = Node::new(value, ttl);
        // weight is always 1 for now
        self.store.put(hashed_key, node, 1);
    }

    /// Remove a key from the cache if it exists.
    pub fn remove<Q>(&self, key: &Q)
    where
        K: Borrow<Q>,
        Q: Hash + ?Sized,
    {
        let hashed_key = self.hasher.hash_one(key);
        self.store.remove(&hashed_key);
    }

    pub(crate) fn force_put(&self, key: &K, value: T, ttl: Option<Duration>) {
        if let Some(t) = ttl {
            if t.is_zero() {
                return;
            }
        }
        let hashed_key = self.hasher.hash_one(key);
        let node = Node::new(value, ttl);
        // weight is always 1 for now
        self.store.force_put(hashed_key, node, 1);
    }

    /// This is equivalent to [MemoryCache::get] but for an arbitrary amount of keys.
    pub fn multi_get<'a, I, Q>(&self, keys: I) -> Vec<(Option<T>, CacheStatus)>
    where
        I: Iterator<Item = &'a Q>,
        Q: Hash + ?Sized + 'a,
        K: Borrow<Q> + 'a,
    {
        let mut resp = Vec::with_capacity(keys.size_hint().0);
        for key in keys {
            resp.push(self.get(key));
        }
        resp
    }

    /// Same as [MemoryCache::multi_get] but returns the keys that are missing from the cache.
    pub fn multi_get_with_miss<'a, I, Q>(
        &self,
        keys: I,
    ) -> (Vec<(Option<T>, CacheStatus)>, Vec<&'a Q>)
    where
        I: Iterator<Item = &'a Q>,
        Q: Hash + ?Sized + 'a,
        K: Borrow<Q> + 'a,
    {
        let mut resp = Vec::with_capacity(keys.size_hint().0);
        let mut missed = Vec::with_capacity(keys.size_hint().0 / 2);
        for key in keys {
            let (lookup, cache_status) = self.get(key);
            if lookup.is_none() {
                missed.push(key);
            }
            resp.push((lookup, cache_status));
        }
        (resp, missed)
    }

    // TODO: evict expired first
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::thread::sleep;

    #[test]
    fn test_get() {
        let cache: MemoryCache<i32, ()> = MemoryCache::new(10);
        let (res, hit) = cache.get(&1);
        assert_eq!(res, None);
        assert_eq!(hit, CacheStatus::Miss);
    }

    #[test]
    fn test_put_get() {
        let cache: MemoryCache<i32, i32> = MemoryCache::new(10);
        let (res, hit) = cache.get(&1);
        assert_eq!(res, None);
        assert_eq!(hit, CacheStatus::Miss);
        cache.put(&1, 2, None);
        let (res, hit) = cache.get(&1);
        assert_eq!(res.unwrap(), 2);
        assert_eq!(hit, CacheStatus::Hit);
    }

    #[test]
    fn test_put_get_remove() {
        let cache: MemoryCache<i32, i32> = MemoryCache::new(10);
        let (res, hit) = cache.get(&1);
        assert_eq!(res, None);
        assert_eq!(hit, CacheStatus::Miss);
        cache.put(&1, 2, None);
        cache.put(&3, 4, None);
        cache.put(&5, 6, None);
        let (res, hit) = cache.get(&1);
        assert_eq!(res.unwrap(), 2);
        assert_eq!(hit, CacheStatus::Hit);
        cache.remove(&1);
        cache.remove(&3);
        let (res, hit) = cache.get(&1);
        assert_eq!(res, None);
        assert_eq!(hit, CacheStatus::Miss);
        let (res, hit) = cache.get(&3);
        assert_eq!(res, None);
        assert_eq!(hit, CacheStatus::Miss);
        let (res, hit) = cache.get(&5);
        assert_eq!(res.unwrap(), 6);
        assert_eq!(hit, CacheStatus::Hit);
    }

    #[test]
    fn test_get_expired() {
        let cache: MemoryCache<i32, i32> = MemoryCache::new(10);
        let (res, hit) = cache.get(&1);
        assert_eq!(res, None);
        assert_eq!(hit, CacheStatus::Miss);
        cache.put(&1, 2, Some(Duration::from_secs(1)));
        sleep(Duration::from_millis(1100));
        let (res, hit) = cache.get(&1);
        assert_eq!(res, None);
        assert_eq!(hit, CacheStatus::Expired);
    }

    #[test]
    fn test_get_stale() {
        let cache: MemoryCache<i32, i32> = MemoryCache::new(10);
        let (res, hit) = cache.get(&1);
        assert_eq!(res, None);
        assert_eq!(hit, CacheStatus::Miss);
        cache.put(&1, 2, Some(Duration::from_secs(1)));
        sleep(Duration::from_millis(1100));
        let (res, hit) = cache.get_stale(&1);
        assert_eq!(res.unwrap(), 2);
        // we slept 1100ms and the ttl is 1000ms
        assert!(hit.stale().unwrap() >= Duration::from_millis(100));
    }

    #[test]
    fn test_eviction() {
        let cache: MemoryCache<i32, i32> = MemoryCache::new(2);
        cache.put(&1, 2, None);
        cache.put(&2, 4, None);
        cache.put(&3, 6, None);
        let (res, hit) = cache.get(&1);
        assert_eq!(res, None);
        assert_eq!(hit, CacheStatus::Miss);
        let (res, hit) = cache.get(&2);
        assert_eq!(res.unwrap(), 4);
        assert_eq!(hit, CacheStatus::Hit);
        let (res, hit) = cache.get(&3);
        assert_eq!(res.unwrap(), 6);
        assert_eq!(hit, CacheStatus::Hit);
    }

    #[test]
    fn test_multi_get() {
        let cache: MemoryCache<i32, i32> = MemoryCache::new(10);
        cache.put(&2, -2, None);
        let keys: Vec<i32> = vec![1, 2, 3];
        let resp = cache.multi_get(keys.iter());
        assert_eq!(resp[0].0, None);
        assert_eq!(resp[0].1, CacheStatus::Miss);
        assert_eq!(resp[1].0.unwrap(), -2);
        assert_eq!(resp[1].1, CacheStatus::Hit);
        assert_eq!(resp[2].0, None);
        assert_eq!(resp[2].1, CacheStatus::Miss);

        let (resp, missed) = cache.multi_get_with_miss(keys.iter());
        assert_eq!(resp[0].0, None);
        assert_eq!(resp[0].1, CacheStatus::Miss);
        assert_eq!(resp[1].0.unwrap(), -2);
        assert_eq!(resp[1].1, CacheStatus::Hit);
        assert_eq!(resp[2].0, None);
        assert_eq!(resp[2].1, CacheStatus::Miss);
        assert_eq!(missed[0], &1);
        assert_eq!(missed[1], &3);
    }

    #[test]
    fn test_get_with_mismatched_key() {
        let cache: MemoryCache<String, ()> = MemoryCache::new(10);
        let (res, hit) = cache.get("Hello");
        assert_eq!(res, None);
        assert_eq!(hit, CacheStatus::Miss);
    }

    #[test]
    fn test_put_get_with_mismatched_key() {
        let cache: MemoryCache<String, i32> = MemoryCache::new(10);
        let (res, hit) = cache.get("1");
        assert_eq!(res, None);
        assert_eq!(hit, CacheStatus::Miss);
        cache.put("1", 2, None);
        let (res, hit) = cache.get("1");
        assert_eq!(res.unwrap(), 2);
        assert_eq!(hit, CacheStatus::Hit);
    }
}


================================================
FILE: pingora-memory-cache/src/read_through.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! An async read through cache where cache misses are populated via the provided
//! async callback.

use super::{CacheStatus, MemoryCache};

use async_trait::async_trait;
use log::warn;
use parking_lot::RwLock;
use pingora_error::{Error, ErrorTrait};
use std::collections::HashMap;
use std::hash::Hash;
use std::marker::PhantomData;
use std::sync::Arc;
use std::time::{Duration, Instant};
use tokio::sync::Semaphore;

struct CacheLock {
    pub lock_start: Instant,
    pub lock: Semaphore,
}

impl CacheLock {
    pub fn new_arc() -> Arc<Self> {
        Arc::new(CacheLock {
            lock: Semaphore::new(0),
            lock_start: Instant::now(),
        })
    }

    pub fn too_old(&self, age: Option<&Duration>) -> bool {
        match age {
            Some(t) => Instant::now() - self.lock_start > *t,
            None => false,
        }
    }
}

#[async_trait]
/// [Lookup] defines the caching behavior that the implementor needs. The `extra` field can be used
/// to define any additional metadata that the implementor uses to determine cache eligibility.
///
/// # Examples
///
/// ```ignore
/// use pingora_error::{ErrorTrait, Result};
/// use std::time::Duration;
///
/// struct MyLookup;
///
/// impl Lookup<usize, usize, ()> for MyLookup {
///     async fn lookup(
///         &self,
///         _key: &usize,
///         extra: Option<&()>,
///     ) -> Result<(usize, Option<Duration>), Box<dyn ErrorTrait + Send + Sync>> {
///         // Define your business logic here.
///         Ok(1, None)
///     }
/// }
/// ```
pub trait Lookup<K, T, S> {
    /// Return a value and an optional TTL for the given key.
    async fn lookup(
        key: &K,
        extra: Option<&S>,
    ) -> Result<(T, Option<Duration>), Box<dyn ErrorTrait + Send + Sync>>
    where
        K: 'async_trait,
        S: 'async_trait;
}

#[async_trait]
/// [MultiLookup] is similar to [Lookup]. Implement this trait if the system being queried support
/// looking up multiple keys in a single API call.
pub trait MultiLookup<K, T, S> {
    /// Like [Lookup::lookup] but for an arbitrary amount of keys.
    async fn multi_lookup(
        keys: &[&K],
        extra: Option<&S>,
    ) -> Result<Vec<(T, Option<Duration>)>, Box<dyn ErrorTrait + Send + Sync>>
    where
        K: 'async_trait,
        S: 'async_trait;
}

const LOOKUP_ERR_MSG: &str = "RTCache: lookup error";

/// A read-through in-memory cache on top of [MemoryCache]
///
/// Instead of providing a `put` function, [RTCache] requires a type which implements [Lookup] to
/// be automatically called during cache miss to populate the cache. This is useful when trying to
/// cache queries to external system such as DNS or databases.
///
/// Lookup coalescing is provided so that multiple concurrent lookups for the same key results
/// only in one lookup callback.
pub struct RTCache<K, T, CB, S>
where
    K: Hash + Send,
    T: Clone + Send,
{
    inner: MemoryCache<K, T>,
    _callback: PhantomData<CB>,
    lockers: RwLock<HashMap<u64, Arc<CacheLock>>>,
    lock_age: Option<Duration>,
    lock_timeout: Option<Duration>,
    phantom: PhantomData<S>,
}

impl<K, T, CB, S> RTCache<K, T, CB, S>
where
    K: Hash + Send,
    T: Clone + Send + Sync + 'static,
{
    /// Create a new [RTCache] of given size. `lock_age` defines how long a lock is valid for.
    /// `lock_timeout` is used to stop a lookup from holding on to the key for too long.
    pub fn new(size: usize, lock_age: Option<Duration>, lock_timeout: Option<Duration>) -> Self {
        RTCache {
            inner: MemoryCache::new(size),
            lockers: RwLock::new(HashMap::new()),
            _callback: PhantomData,
            lock_age,
            lock_timeout,
            phantom: PhantomData,
        }
    }
}

impl<K, T, CB, S> RTCache<K, T, CB, S>
where
    K: Hash + Send,
    T: Clone + Send + Sync + 'static,
    CB: Lookup<K, T, S>,
{
    /// Query the cache for a given value. If it exists and no TTL is configured initially, it will
    /// use the `ttl` value given.
    pub async fn get(
        &self,
        key: &K,
        ttl: Option<Duration>,
        extra: Option<&S>,
    ) -> (Result<T, Box<Error>>, CacheStatus) {
        let (result, cache_state) = self.inner.get(key);
        if let Some(result) = result {
            /* cache hit */
            return (Ok(result), cache_state);
        }

        let hashed_key = self.inner.hasher.hash_one(key);

        /* Cache miss, try to lock the lookup. Check if there is already a lookup */
        let my_lock = {
            let lockers = self.lockers.read();
            /* clone the Arc */
            lockers.get(&hashed_key).cloned()
        }; // read lock dropped

        /* try insert a cache lock into locker */
        let (my_write, my_read) = match my_lock {
            // TODO: use a union
            Some(lock) => {
                /* There is an ongoing lookup to the same key */
                if lock.too_old(self.lock_age.as_ref()) {
                    (None, None)
                } else {
                    (None, Some(lock))
                }
            }
            None => {
                let mut lockers = self.lockers.write();
                match lockers.get(&hashed_key) {
                    Some(lock) => {
                        /* another lookup to the same key got the write lock to locker first */
                        if lock.too_old(self.lock_age.as_ref()) {
                            (None, None)
                        } else {
                            (None, Some(lock.clone()))
                        }
                    }
                    None => {
                        let new_lock = CacheLock::new_arc();
                        let new_lock2 = new_lock.clone();
                        lockers.insert(hashed_key, new_lock2);
                        (Some(new_lock), None)
                    }
                } // write lock dropped
            }
        };

        if let Some(my_lock) = my_read {
            /* another task will do the lookup */

            /* if available_permits > 0, writer is done */
            if my_lock.lock.available_permits() == 0 {
                /* block here to wait for writer to finish lookup */
                let lock_fut = my_lock.lock.acquire();
                let timed_out = match self.lock_timeout {
                    Some(t) => pingora_timeout::timeout(t, lock_fut).await.is_err(),
                    None => {
                        let _ = lock_fut.await;
                        false
                    }
                };
                if timed_out {
                    let value = CB::lookup(key, extra).await;
                    return match value {
                        Ok((v, _ttl)) => (Ok(v), cache_state),
                        Err(e) => {
                            let mut err = Error::new_str(LOOKUP_ERR_MSG);
                            err.set_cause(e);
                            (Err(err), cache_state)
                        }
                    };
                }
            } // permit returned here

            let (result, cache_state) = self.inner.get(key);
            if let Some(result) = result {
                /* cache lock hit, slow as a miss */
                (Ok(result), CacheStatus::LockHit)
            } else {
                /* probably error happen during the actual lookup */
                warn!(
                    "RTCache: no result after read lock, cache status: {:?}",
                    cache_state
                );
                match CB::lookup(key, extra).await {
                    Ok((v, new_ttl)) => {
                        self.inner.force_put(key, v.clone(), new_ttl.or(ttl));
                        (Ok(v), cache_state)
                    }
                    Err(e) => {
                        let mut err = Error::new_str(LOOKUP_ERR_MSG);
                        err.set_cause(e);
                        (Err(err), cache_state)
                    }
                }
            }
        } else {
            /* this one will do the look up, either because it gets the write lock or the read
             * lock age is reached */
            let value = CB::lookup(key, extra).await;
            let ret = match value {
                Ok((v, new_ttl)) => {
                    /* Don't put() if lock ago too old, to avoid too many concurrent writes */
                    if my_write.is_some() {
                        self.inner.force_put(key, v.clone(), new_ttl.or(ttl));
                    }
                    (Ok(v), cache_state) // the original cache_state: Miss or Expired
                }
                Err(e) => {
                    let mut err = Error::new_str(LOOKUP_ERR_MSG);
                    err.set_cause(e);
                    (Err(err), cache_state)
                }
            };
            if let Some(my_write) = my_write {
                /* add permit so that reader can start. Any number of permits will do,
                 * since readers will return permits right away. */
                my_write.lock.add_permits(10);

                {
                    // remove the lock from locker
                    let mut lockers = self.lockers.write();
                    lockers.remove(&hashed_key);
                } // write lock dropped here
            }

            ret
        }
    }

    /// Similar to [Self::get], query the cache for a given value, but also returns the value even if the
    /// value is expired up to `stale_ttl`. If it is a cache miss or the value is stale more than
    /// the `stale_ttl`, a lookup will be performed to populate the cache.
    pub async fn get_stale(
        &self,
        key: &K,
        ttl: Option<Duration>,
        extra: Option<&S>,
        stale_ttl: Duration,
    ) -> (Result<T, Box<Error>>, CacheStatus) {
        let (result, cache_status) = self.inner.get_stale(key);
        if let Some(result) = result {
            let stale_duration = cache_status.stale();
            if stale_duration.unwrap_or(Duration::ZERO) <= stale_ttl {
                return (Ok(result), cache_status);
            }
        }
        let (res, status) = self.get(key, ttl, extra).await;
        (res, status)
    }
}

impl<K, T, CB, S> RTCache<K, T, CB, S>
where
    K: Hash + Clone + Send + Sync,
    T: Clone + Send + Sync + 'static,
    S: Clone + Send + Sync,
    CB: Lookup<K, T, S> + Sync + Send,
{
    /// Similar to [Self::get_stale], but when it returns the stale value, it also initiates a lookup
    /// in the background in order to refresh the value.
    ///
    /// Note that this function requires the [RTCache] to be static, which can be done by wrapping
    /// it with something like [once_cell::sync::Lazy].
    ///
    /// [once_cell::sync::Lazy]: https://docs.rs/once_cell/latest/once_cell/sync/struct.Lazy.html
    pub async fn get_stale_while_update(
        &'static self,
        key: &K,
        ttl: Option<Duration>,
        extra: Option<&S>,
        stale_ttl: Duration,
    ) -> (Result<T, Box<Error>>, CacheStatus) {
        let (result, cache_status) = self.get_stale(key, ttl, extra, stale_ttl).await;
        let key = key.clone();
        let extra = extra.cloned();
        if cache_status.stale().is_some() {
            tokio::spawn(async move {
                let _ = self.get(&key, ttl, extra.as_ref()).await;
            });
        }
        (result, cache_status)
    }
}

impl<K, T, CB, S> RTCache<K, T, CB, S>
where
    K: Hash + Send,
    T: Clone + Send + Sync + 'static,
    CB: MultiLookup<K, T, S>,
{
    /// Same behavior as [RTCache::get] but for an arbitrary amount of keys.
    ///
    /// If there are keys that are missing from the cache, `multi_lookup` is invoked to populate the
    /// cache before returning the final results. This is useful if your type supports batch
    /// queries.
    ///
    /// To avoid dead lock for the same key across concurrent `multi_get` calls,
    /// this function does not provide lookup coalescing.
    pub async fn multi_get<'a, I>(
        &self,
        keys: I,
        ttl: Option<Duration>,
        extra: Option<&S>,
    ) -> Result<Vec<(T, CacheStatus)>, Box<Error>>
    where
        I: Iterator<Item = &'a K>,
        K: 'a,
    {
        let size = keys.size_hint().0;
        let (hits, misses) = self.inner.multi_get_with_miss(keys);
        let mut final_results = Vec::with_capacity(size);
        let miss_results = if !misses.is_empty() {
            match CB::multi_lookup(&misses, extra).await {
                Ok(miss_results) => {
                    // assert! here to prevent index panic when building results,
                    // final_results has the full list of misses but miss_results might not
                    assert!(
                        miss_results.len() == misses.len(),
                        "multi_lookup() failed to return the matching number of results"
                    );
                    /* put the misses into cache */
                    for item in misses.iter().zip(miss_results.iter()) {
                        self.inner
                            .force_put(item.0, (item.1).0.clone(), (item.1).1.or(ttl));
                    }
                    miss_results
                }
                Err(e) => {
                    /* NOTE: we give up the hits when encounter lookup error */
                    let mut err = Error::new_str(LOOKUP_ERR_MSG);
                    err.set_cause(e);
                    return Err(err);
                }
            }
        } else {
            vec![] // to make the rest code simple, allocating one unused empty vec should be fine
        };
        /* fill in final_result */
        let mut n_miss = 0;
        for item in hits {
            match item.0 {
                Some(v) => final_results.push((v, item.1)),
                None => {
                    final_results // miss_results.len() === #None in result (asserted above)
                    .push((miss_results[n_miss].0.clone(), CacheStatus::Miss));
                    n_miss += 1;
                }
            }
        }
        Ok(final_results)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use atomic::AtomicI32;
    use std::sync::atomic;

    #[derive(Clone, Debug)]
    struct ExtraOpt {
        error: bool,
        empty: bool,
        delay_for: Option<Duration>,
        used: Arc<AtomicI32>,
    }

    struct TestCB();

    #[async_trait]
    impl Lookup<i32, i32, ExtraOpt> for TestCB {
        async fn lookup(
            _key: &i32,
            extra: Option<&ExtraOpt>,
        ) -> Result<(i32, Option<Duration>), Box<dyn ErrorTrait + Send + Sync>> {
            // this function returns #lookup_times
            let mut used = 0;
            if let Some(e) = extra {
                used = e.used.fetch_add(1, atomic::Ordering::Relaxed) + 1;
                if e.error {
                    return Err(Error::new_str("test error"));
                }
                if let Some(delay_for) = e.delay_for {
                    tokio::time::sleep(delay_for).await;
                }
            }
            Ok((used, None))
        }
    }

    #[async_trait]
    impl MultiLookup<i32, i32, ExtraOpt> for TestCB {
        async fn multi_lookup(
            keys: &[&i32],
            extra: Option<&ExtraOpt>,
        ) -> Result<Vec<(i32, Option<Duration>)>, Box<dyn ErrorTrait + Send + Sync>> {
            let mut resp = vec![];
            if let Some(extra) = extra {
                if extra.empty {
                    return Ok(resp);
                }
            }
            for key in keys {
                resp.push((**key, None));
            }
            Ok(resp)
        }
    }

    #[tokio::test]
    async fn test_basic_get() {
        let cache: RTCache<i32, i32, TestCB, ExtraOpt> = RTCache::new(10, None, None);
        let opt = Some(ExtraOpt {
            error: false,
            empty: false,
            delay_for: None,
            used: Arc::new(AtomicI32::new(0)),
        });
        let (res, hit) = cache.get(&1, None, opt.as_ref()).await;
        assert_eq!(res.unwrap(), 1);
        assert_eq!(hit, CacheStatus::Miss);
        let (res, hit) = cache.get(&1, None, opt.as_ref()).await;
        assert_eq!(res.unwrap(), 1);
        assert_eq!(hit, CacheStatus::Hit);
    }

    #[tokio::test]
    async fn test_basic_get_error() {
        let cache: RTCache<i32, i32, TestCB, ExtraOpt> = RTCache::new(10, None, None);
        let opt1 = Some(ExtraOpt {
            error: true,
            empty: false,
            delay_for: None,
            used: Arc::new(AtomicI32::new(0)),
        });
        let (res, hit) = cache.get(&-1, None, opt1.as_ref()).await;
        assert!(res.is_err());
        assert_eq!(hit, CacheStatus::Miss);
    }

    #[tokio::test]
    async fn test_concurrent_get() {
        let cache: RTCache<i32, i32, TestCB, ExtraOpt> = RTCache::new(10, None, None);
        let cache = Arc::new(cache);
        let opt = Some(ExtraOpt {
            error: false,
            empty: false,
            delay_for: None,
            used: Arc::new(AtomicI32::new(0)),
        });
        let cache_c = cache.clone();
        let opt1 = opt.clone();
        // concurrent gets, only 1 will call the callback
        let t1 = tokio::spawn(async move {
            let (res, _hit) = cache_c.get(&1, None, opt1.as_ref()).await;
            res.unwrap()
        });
        let cache_c = cache.clone();
        let opt2 = opt.clone();
        let t2 = tokio::spawn(async move {
            let (res, _hit) = cache_c.get(&1, None, opt2.as_ref()).await;
            res.unwrap()
        });
        let opt3 = opt.clone();
        let cache_c = cache.clone();
        let t3 = tokio::spawn(async move {
            let (res, _hit) = cache_c.get(&1, None, opt3.as_ref()).await;
            res.unwrap()
        });
        let (r1, r2, r3) = tokio::join!(t1, t2, t3);
        assert_eq!(r1.unwrap(), 1);
        assert_eq!(r2.unwrap(), 1);
        assert_eq!(r3.unwrap(), 1);
    }

    #[tokio::test]
    async fn test_concurrent_get_error() {
        let cache: RTCache<i32, i32, TestCB, ExtraOpt> = RTCache::new(10, None, None);
        let cache = Arc::new(cache);
        let cache_c = cache.clone();
        let opt1 = Some(ExtraOpt {
            error: true,
            empty: false,
            delay_for: None,
            used: Arc::new(AtomicI32::new(0)),
        });
        let opt2 = opt1.clone();
        let opt3 = opt1.clone();
        // concurrent gets, only 1 will call the callback
        let t1 = tokio::spawn(async move {
            let (res, _hit) = cache_c.get(&-1, None, opt1.as_ref()).await;
            res.is_err()
        });
        let cache_c = cache.clone();
        let t2 = tokio::spawn(async move {
            let (res, _hit) = cache_c.get(&-1, None, opt2.as_ref()).await;
            res.is_err()
        });
        let cache_c = cache.clone();
        let t3 = tokio::spawn(async move {
            let (res, _hit) = cache_c.get(&-1, None, opt3.as_ref()).await;
            res.is_err()
        });
        let (r1, r2, r3) = tokio::join!(t1, t2, t3);
        assert!(r1.unwrap());
        assert!(r2.unwrap());
        assert!(r3.unwrap());
    }

    #[tokio::test]
    async fn test_concurrent_get_different_value() {
        let cache: RTCache<i32, i32, TestCB, ExtraOpt> = RTCache::new(10, None, None);
        let cache = Arc::new(cache);
        let opt1 = Some(ExtraOpt {
            error: false,
            empty: false,
            delay_for: None,
            used: Arc::new(AtomicI32::new(0)),
        });
        let opt2 = opt1.clone();
        let opt3 = opt1.clone();
        let cache_c = cache.clone();
        // concurrent gets to different keys, no locks, all will call the cb
        let t1 = tokio::spawn(async move {
            let (res, _hit) = cache_c.get(&1, None, opt1.as_ref()).await;
            res.unwrap()
        });
        let cache_c = cache.clone();
        let t2 = tokio::spawn(async move {
            let (res, _hit) = cache_c.get(&3, None, opt2.as_ref()).await;
            res.unwrap()
        });
        let cache_c = cache.clone();
        let t3 = tokio::spawn(async move {
            let (res, _hit) = cache_c.get(&5, None, opt3.as_ref()).await;
            res.unwrap()
        });
        let (r1, r2, r3) = tokio::join!(t1, t2, t3);
        // 1 lookup + 2 lookups + 3 lookups, order not matter
        assert_eq!(r1.unwrap() + r2.unwrap() + r3.unwrap(), 6);
    }

    #[tokio::test]
    async fn test_get_lock_age() {
        // 1 sec lock age
        let cache: RTCache<i32, i32, TestCB, ExtraOpt> =
            RTCache::new(10, Some(Duration::from_secs(1)), None);
        let cache = Arc::new(cache);
        let counter = Arc::new(AtomicI32::new(0));
        let opt1 = Some(ExtraOpt {
            error: false,
            empty: false,
            delay_for: Some(Duration::from_secs(2)),
            used: counter.clone(),
        });

        let opt2 = Some(ExtraOpt {
            error: false,
            empty: false,
            delay_for: None,
            used: counter.clone(),
        });
        let opt3 = opt2.clone();
        let cache_c = cache.clone();
        // t1 will be delay for 2 sec
        let t1 = tokio::spawn(async move {
            let (res, _hit) = cache_c.get(&1, None, opt1.as_ref()).await;
            res.unwrap()
        });
        // start t2 and t3 1.5 seconds later, since lock age is 1 sec, there will be no lock
        tokio::time::sleep(Duration::from_secs_f32(1.5)).await;
        let cache_c = cache.clone();
        let t2 = tokio::spawn(async move {
            let (res, _hit) = cache_c.get(&1, None, opt2.as_ref()).await;
            res.unwrap()
        });
        let cache_c = cache.clone();
        let t3 = tokio::spawn(async move {
            let (res, _hit) = cache_c.get(&1, None, opt3.as_ref()).await;
            res.unwrap()
        });
        let (r1, r2, r3) = tokio::join!(t1, t2, t3);
        // 1 lookup + 2 lookups + 3 lookups, order not matter
        assert_eq!(r1.unwrap() + r2.unwrap() + r3.unwrap(), 6);
    }

    #[tokio::test]
    async fn test_get_lock_timeout() {
        // 1 sec lock timeout
        let cache: RTCache<i32, i32, TestCB, ExtraOpt> =
            RTCache::new(10, None, Some(Duration::from_secs(1)));
        let cache = Arc::new(cache);
        let counter = Arc::new(AtomicI32::new(0));
        let opt1 = Some(ExtraOpt {
            error: false,
            empty: false,
            delay_for: Some(Duration::from_secs(2)),
            used: counter.clone(),
        });
        let opt2 = Some(ExtraOpt {
            error: false,
            empty: false,
            delay_for: None,
            used: counter.clone(),
        });
        let opt3 = opt2.clone();
        let cache_c = cache.clone();
        // t1 will be delay for 2 sec
        let t1 = tokio::spawn(async move {
            let (res, _hit) = cache_c.get(&1, None, opt1.as_ref()).await;
            res.unwrap()
        });
        // since lock timeout is 1 sec, t2 and t3 will do their own lookup after 1 sec
        let cache_c = cache.clone();
        let t2 = tokio::spawn(async move {
            let (res, _hit) = cache_c.get(&1, None, opt2.as_ref()).await;
            res.unwrap()
        });
        let cache_c = cache.clone();
        let t3 = tokio::spawn(async move {
            let (res, _hit) = cache_c.get(&1, None, opt3.as_ref()).await;
            res.unwrap()
        });
        let (r1, r2, r3) = tokio::join!(t1, t2, t3);
        // 1 lookup + 2 lookups + 3 lookups, order not matter
        assert_eq!(r1.unwrap() + r2.unwrap() + r3.unwrap(), 6);
    }

    #[tokio::test]
    async fn test_multi_get() {
        let cache: RTCache<i32, i32, TestCB, ExtraOpt> = RTCache::new(10, None, None);
        let counter = Arc::new(AtomicI32::new(0));
        let opt1 = Some(ExtraOpt {
            error: false,
            empty: false,
            delay_for: Some(Duration::from_secs(2)),
            used: counter.clone(),
        });
        // make 1 a hit first
        let (res, hit) = cache.get(&1, None, opt1.as_ref()).await;
        assert_eq!(res.unwrap(), 1);
        assert_eq!(hit, CacheStatus::Miss);
        let (res, hit) = cache.get(&1, None, opt1.as_ref()).await;
        assert_eq!(res.unwrap(), 1);
        assert_eq!(hit, CacheStatus::Hit);
        // 1 hit 2 miss 3 miss
        let resp = cache
            .multi_get([1, 2, 3].iter(), None, opt1.as_ref())
            .await
            .unwrap();
        assert_eq!(resp[0].0, 1);
        assert_eq!(resp[0].1, CacheStatus::Hit);
        assert_eq!(resp[1].0, 2);
        assert_eq!(resp[1].1, CacheStatus::Miss);
        assert_eq!(resp[2].0, 3);
        assert_eq!(resp[2].1, CacheStatus::Miss);
        // all hits after a fetch
        let resp = cache
            .multi_get([1, 2, 3].iter(), None, opt1.as_ref())
            .await
            .unwrap();
        assert_eq!(resp[0].0, 1);
        assert_eq!(resp[0].1, CacheStatus::Hit);
        assert_eq!(resp[1].0, 2);
        assert_eq!(resp[1].1, CacheStatus::Hit);
        assert_eq!(resp[2].0, 3);
        assert_eq!(resp[2].1, CacheStatus::Hit);
    }

    #[tokio::test]
    #[should_panic(expected = "multi_lookup() failed to return the matching number of results")]
    async fn test_inconsistent_miss_results() {
        // force an empty result
        let opt1 = Some(ExtraOpt {
            error: false,
            empty: true,
            delay_for: None,
            used: Arc::new(AtomicI32::new(0)),
        });
        let cache: RTCache<i32, i32, TestCB, ExtraOpt> = RTCache::new(10, None, None);
        cache
            .multi_get([4, 5, 6].iter(), None, opt1.as_ref())
            .await
            .unwrap();
    }

    #[tokio::test]
    async fn test_get_stale() {
        let ttl = Some(Duration::from_millis(100));
        let cache: RTCache<i32, i32, TestCB, ExtraOpt> = RTCache::new(10, None, None);
        let opt = Some(ExtraOpt {
            error: false,
            empty: false,
            delay_for: None,
            used: Arc::new(AtomicI32::new(0)),
        });
        let (res, hit) = cache.get(&1, ttl, opt.as_ref()).await;
        assert_eq!(res.unwrap(), 1);
        assert_eq!(hit, CacheStatus::Miss);
        let (res, hit) = cache.get(&1, ttl, opt.as_ref()).await;
        assert_eq!(res.unwrap(), 1);
        assert_eq!(hit, CacheStatus::Hit);
        tokio::time::sleep(Duration::from_millis(150)).await;
        let (res, hit) = cache
            .get_stale(&1, ttl, opt.as_ref(), Duration::from_millis(1000))
            .await;
        assert_eq!(res.unwrap(), 1);
        assert!(hit.stale().is_some());

        let (res, hit) = cache
            .get_stale(&1, ttl, opt.as_ref(), Duration::from_millis(30))
            .await;
        assert_eq!(res.unwrap(), 2);
        assert_eq!(hit, CacheStatus::Expired);
    }

    #[tokio::test]
    async fn test_get_stale_while_update() {
        use once_cell::sync::Lazy;
        let ttl = Some(Duration::from_millis(100));
        static CACHE: Lazy<RTCache<i32, i32, TestCB, ExtraOpt>> =
            Lazy::new(|| RTCache::new(10, None, None));
        let opt = Some(ExtraOpt {
            error: false,
            empty: false,
            delay_for: None,
            used: Arc::new(AtomicI32::new(0)),
        });
        let (res, hit) = CACHE.get(&1, ttl, opt.as_ref()).await;
        assert_eq!(res.unwrap(), 1);
        assert_eq!(hit, CacheStatus::Miss);
        let (res, hit) = CACHE.get(&1, ttl, opt.as_ref()).await;
        assert_eq!(res.unwrap(), 1);
        assert_eq!(hit, CacheStatus::Hit);
        tokio::time::sleep(Duration::from_millis(150)).await;
        let (res, hit) = CACHE
            .get_stale_while_update(&1, ttl, opt.as_ref(), Duration::from_millis(1000))
            .await;
        assert_eq!(res.unwrap(), 1);
        assert!(hit.stale().is_some());

        // allow the background lookup to finish
        tokio::time::sleep(Duration::from_millis(10)).await;

        let (res, hit) = CACHE.get(&1, ttl, opt.as_ref()).await;
        assert_eq!(res.unwrap(), 2);
        assert_eq!(hit, CacheStatus::Hit);
    }
}


================================================
FILE: pingora-openssl/Cargo.toml
================================================
[package]
name = "pingora-openssl"
version = "0.8.0"
authors = ["Yuchen Wu <yuchen@cloudflare.com>"]
license = "Apache-2.0"
edition = "2021"
repository = "https://github.com/cloudflare/pingora"
categories = ["asynchronous", "network-programming"]
keywords = ["async", "tls", "ssl", "pingora"]
description = """
OpenSSL async APIs for Pingora.
"""

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "pingora_openssl"
path = "src/lib.rs"

[dependencies]
openssl-sys = "0.9"
openssl = { version = "0.10.72", features = ["vendored"] }
tokio-openssl = { version = "0.6" }
libc = "0.2.70"
foreign-types = { version = "0.3"}

[dev-dependencies]
tokio-test = "0.4"
tokio = { workspace = true, features = ["full"] }


================================================
FILE: pingora-openssl/LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: pingora-openssl/src/ext.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use foreign_types::ForeignTypeRef;
use libc::*;
use openssl::error::ErrorStack;
use openssl::pkey::{HasPrivate, PKeyRef};
use openssl::ssl::{Ssl, SslAcceptor, SslRef};
use openssl::x509::store::X509StoreRef;
use openssl::x509::verify::X509VerifyParamRef;
use openssl::x509::X509Ref;
use openssl_sys::{
    SSL_ctrl, EVP_PKEY, SSL, SSL_CTRL_SET_GROUPS_LIST, SSL_CTRL_SET_VERIFY_CERT_STORE, X509,
    X509_VERIFY_PARAM,
};
use std::ffi::CString;
use std::os::raw;

fn cvt(r: c_long) -> Result<c_long, ErrorStack> {
    if r != 1 {
        Err(ErrorStack::get())
    } else {
        Ok(r)
    }
}

extern "C" {
    pub fn X509_VERIFY_PARAM_add1_host(
        param: *mut X509_VERIFY_PARAM,
        name: *const c_char,
        namelen: size_t,
    ) -> c_int;

    pub fn SSL_use_certificate(ssl: *mut SSL, cert: *mut X509) -> c_int;
    pub fn SSL_use_PrivateKey(ssl: *mut SSL, key: *mut EVP_PKEY) -> c_int;

    pub fn SSL_set_cert_cb(
        ssl: *mut SSL,
        cb: ::std::option::Option<
            unsafe extern "C" fn(ssl: *mut SSL, arg: *mut raw::c_void) -> raw::c_int,
        >,
        arg: *mut raw::c_void,
    );
}

/// Add name as an additional reference identifier that can match the peer's certificate
///
/// See [X509_VERIFY_PARAM_set1_host](https://www.openssl.org/docs/man3.1/man3/X509_VERIFY_PARAM_set1_host.html).
pub fn add_host(verify_param: &mut X509VerifyParamRef, host: &str) -> Result<(), ErrorStack> {
    if host.is_empty() {
        return Ok(());
    }
    unsafe {
        cvt(X509_VERIFY_PARAM_add1_host(
            verify_param.as_ptr(),
            host.as_ptr() as *const c_char,
            host.len(),
        ) as c_long)
        .map(|_| ())
    }
}

/// Set the verify cert store of `ssl`
///
/// See [SSL_set1_verify_cert_store](https://www.openssl.org/docs/man1.1.1/man3/SSL_set1_verify_cert_store.html).
pub fn ssl_set_verify_cert_store(
    ssl: &mut SslRef,
    cert_store: &X509StoreRef,
) -> Result<(), ErrorStack> {
    unsafe {
        cvt(SSL_ctrl(
            ssl.as_ptr(),
            SSL_CTRL_SET_VERIFY_CERT_STORE,
            1, // increase the ref count of X509Store so that ssl_ctx can outlive X509StoreRef
            cert_store.as_ptr() as *mut c_void,
        ))?;
    }
    Ok(())
}

/// Load the certificate into `ssl`
///
/// See [SSL_use_certificate](https://www.openssl.org/docs/man1.1.1/man3/SSL_use_certificate.html).
pub fn ssl_use_certificate(ssl: &mut SslRef, cert: &X509Ref) -> Result<(), ErrorStack> {
    unsafe {
        cvt(SSL_use_certificate(ssl.as_ptr(), cert.as_ptr()) as c_long)?;
    }
    Ok(())
}

/// Load the private key into `ssl`
///
/// See [SSL_use_certificate](https://www.openssl.org/docs/man1.1.1/man3/SSL_use_PrivateKey.html).
pub fn ssl_use_private_key<T>(ssl: &mut SslRef, key: &PKeyRef<T>) -> Result<(), ErrorStack>
where
    T: HasPrivate,
{
    unsafe {
        cvt(SSL_use_PrivateKey(ssl.as_ptr(), key.as_ptr()) as c_long)?;
    }
    Ok(())
}

/// Add the certificate into the cert chain of `ssl`
///
/// See [SSL_add1_chain_cert](https://www.openssl.org/docs/man1.1.1/man3/SSL_add1_chain_cert.html)
pub fn ssl_add_chain_cert(ssl: &mut SslRef, cert: &X509Ref) -> Result<(), ErrorStack> {
    const SSL_CTRL_CHAIN_CERT: i32 = 89;
    unsafe {
        cvt(SSL_ctrl(
            ssl.as_ptr(),
            SSL_CTRL_CHAIN_CERT,
            1, // increase the ref count of X509 so that ssl can outlive X509StoreRef
            cert.as_ptr() as *mut c_void,
        ))?;
    }
    Ok(())
}

/// Set renegotiation
///
/// This function is specific to BoringSSL. This function is noop for OpenSSL.
pub fn ssl_set_renegotiate_mode_freely(_ssl: &mut SslRef) {}

/// Set the curves/groups of `ssl`
///
/// See [set_groups_list](https://www.openssl.org/docs/manmaster/man3/SSL_CTX_set1_curves.html).
pub fn ssl_set_groups_list(ssl: &mut SslRef, groups: &str) -> Result<(), ErrorStack> {
    if groups.contains('\0') {
        return Err(ErrorStack::get());
    }
    let groups = CString::new(groups).map_err(|_| ErrorStack::get())?;
    unsafe {
        cvt(SSL_ctrl(
            ssl.as_ptr(),
            SSL_CTRL_SET_GROUPS_LIST,
            0,
            groups.as_ptr() as *mut c_void,
        ))?;
    }
    Ok(())
}

/// Set's whether a second keyshare to be sent in client hello when PQ is used.
///
/// This function is specific to BoringSSL. This function is noop for OpenSSL.
pub fn ssl_use_second_key_share(_ssl: &mut SslRef, _enabled: bool) {}

/// Clear the error stack
///
/// SSL calls should check and clear the OpenSSL error stack. But some calls fail to do so.
/// This causes the next unrelated SSL call to fail due to the leftover errors. This function allows
/// caller to clear the error stack before performing SSL calls to avoid this issue.
pub fn clear_error_stack() {
    let _ = ErrorStack::get();
}

/// Create a new [Ssl] from &[SslAcceptor]
///
/// this function is to unify the interface between this crate and [`pingora-boringssl`](https://docs.rs/pingora-boringssl)
pub fn ssl_from_acceptor(acceptor: &SslAcceptor) -> Result<Ssl, ErrorStack> {
    Ssl::new(acceptor.context())
}

/// Suspend the TLS handshake when a certificate is needed.
///
/// This function will cause tls handshake to pause and return the error: SSL_ERROR_WANT_X509_LOOKUP.
/// The caller should set the certificate and then call [unblock_ssl_cert()] before continue the
/// handshake on the tls connection.
pub fn suspend_when_need_ssl_cert(ssl: &mut SslRef) {
    unsafe {
        SSL_set_cert_cb(ssl.as_ptr(), Some(raw_cert_block), std::ptr::null_mut());
    }
}

/// Unblock a TLS handshake after the certificate is set.
///
/// The user should continue to call tls handshake after this function is called.
pub fn unblock_ssl_cert(ssl: &mut SslRef) {
    unsafe {
        SSL_set_cert_cb(ssl.as_ptr(), None, std::ptr::null_mut());
    }
}

// Just block the handshake
extern "C" fn raw_cert_block(_ssl: *mut openssl_sys::SSL, _arg: *mut c_void) -> c_int {
    -1
}

/// Whether the TLS error is SSL_ERROR_WANT_X509_LOOKUP
pub fn is_suspended_for_cert(error: &openssl::ssl::Error) -> bool {
    error.code().as_raw() == openssl_sys::SSL_ERROR_WANT_X509_LOOKUP
}

#[allow(clippy::mut_from_ref)]
/// Get a mutable SslRef ouf of SslRef, which is a missing functionality even when holding &mut SslStream
/// # Safety
/// the caller needs to make sure that they hold a &mut SslStream (or other types of mutable ref to the Ssl)
pub unsafe fn ssl_mut(ssl: &SslRef) -> &mut SslRef {
    SslRef::from_ptr_mut(ssl.as_ptr())
}

#[cfg(test)]
mod tests {
    use super::*;
    use openssl::ssl::{SslContextBuilder, SslMethod};

    #[test]
    fn test_ssl_set_groups_list() {
        let ctx_builder = SslContextBuilder::new(SslMethod::tls()).unwrap();
        let ssl = Ssl::new(&ctx_builder.build()).unwrap();
        let ssl_ref = unsafe { ssl_mut(&ssl) };

        // Valid input
        assert!(ssl_set_groups_list(ssl_ref, "P-256:P-384").is_ok());

        // Invalid input (contains null byte)
        assert!(ssl_set_groups_list(ssl_ref, "P-256\0P-384").is_err());
    }
}


================================================
FILE: pingora-openssl/src/lib.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! The OpenSSL API compatibility layer.
//!
//! This crate aims at making [openssl] APIs interchangeable with [boring](https://docs.rs/boring/latest/boring/).
//! In other words, this crate and [`pingora-boringssl`](https://docs.rs/pingora-boringssl) expose identical rust APIs.

#![warn(clippy::all)]

use openssl as ssl_lib;
pub use openssl_sys as ssl_sys;
pub use tokio_openssl as tokio_ssl;
pub mod ext;

// export commonly used libs
pub use ssl_lib::dh;
pub use ssl_lib::error;
pub use ssl_lib::hash;
pub use ssl_lib::nid;
pub use ssl_lib::pkey;
pub use ssl_lib::ssl;
pub use ssl_lib::x509;


================================================
FILE: pingora-pool/Cargo.toml
================================================
[package]
name = "pingora-pool"
version = "0.8.0"
authors = ["Yuchen Wu <yuchen@cloudflare.com>"]
license = "Apache-2.0"
edition = "2021"
repository = "https://github.com/cloudflare/pingora"
categories = ["network-programming"]
keywords = ["async", "pooling", "pingora"]
description = """
A connection pool system for connection reuse.
"""

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "pingora_pool"
path = "src/lib.rs"

[dependencies]
tokio = { workspace = true, features = ["sync", "io-util"] }
thread_local = "1.0"
lru = { workspace = true }
log = { workspace = true }
parking_lot = "0.12"
crossbeam-queue = "0.3"
pingora-timeout = { version = "0.8.0", path = "../pingora-timeout" }

[dev-dependencies]
tokio-test = "0.4"


================================================
FILE: pingora-pool/LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: pingora-pool/src/connection.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Generic connection pooling

use log::{debug, warn};
use parking_lot::{Mutex, RwLock};
use pingora_timeout::{sleep, timeout};
use std::collections::HashMap;
use std::io;
use std::sync::Arc;
use std::time::Duration;
use tokio::io::{AsyncRead, AsyncReadExt};
use tokio::sync::{oneshot, watch, Notify, OwnedMutexGuard};

use super::lru::Lru;

type GroupKey = u64;
#[cfg(unix)]
type ID = i32;
#[cfg(windows)]
type ID = usize;

/// the metadata of a connection
#[derive(Clone, Debug)]
pub struct ConnectionMeta {
    /// The group key. All connections under the same key are considered the same for connection reuse.
    pub key: GroupKey,
    /// The unique ID of a connection.
    pub id: ID,
}

impl ConnectionMeta {
    /// Create a new [ConnectionMeta]
    pub fn new(key: GroupKey, id: ID) -> Self {
        ConnectionMeta { key, id }
    }
}

struct PoolConnection<S> {
    pub notify_use: oneshot::Sender<bool>,
    pub connection: S,
}

impl<S> PoolConnection<S> {
    pub fn new(notify_use: oneshot::Sender<bool>, connection: S) -> Self {
        PoolConnection {
            notify_use,
            connection,
        }
    }

    pub fn release(self) -> S {
        // notify the idle watcher to release the connection
        let _ = self.notify_use.send(true);
        // wait for the watcher to release
        self.connection
    }
}

use crossbeam_queue::ArrayQueue;

/// A pool of exchangeable items
pub struct PoolNode<T> {
    connections: Mutex<HashMap<ID, T>>,
    // a small lock free queue to avoid lock contention
    hot_queue: ArrayQueue<(ID, T)>,
    // to avoid race between 2 evictions on the queue
    hot_queue_remove_lock: Mutex<()>,
    // TODO: store the GroupKey to avoid hash collision?
}

// Keep the queue size small because eviction is O(n) in the queue
const HOT_QUEUE_SIZE: usize = 16;

impl<T> PoolNode<T> {
    /// Create a new [PoolNode]
    pub fn new() -> Self {
        PoolNode {
            connections: Mutex::new(HashMap::new()),
            hot_queue: ArrayQueue::new(HOT_QUEUE_SIZE),
            hot_queue_remove_lock: Mutex::new(()),
        }
    }

    /// Get any item from the pool
    pub fn get_any(&self) -> Option<(ID, T)> {
        let hot_conn = self.hot_queue.pop();
        if hot_conn.is_some() {
            return hot_conn;
        }
        let mut connections = self.connections.lock();
        // find one connection, any connection will do
        let id = match connections.iter().next() {
            Some((k, _)) => *k, // OK to copy i32
            None => return None,
        };
        // unwrap is safe since we just found it
        let connection = connections.remove(&id).unwrap();
        /* NOTE: we don't resize or drop empty connections hashmap
         * We may want to do it if they consume too much memory
         * maybe we should use trees to save memory */
        Some((id, connection))
        // connections.lock released here
    }

    /// Insert an item with the given unique ID into the pool
    pub fn insert(&self, id: ID, conn: T) {
        if let Err(node) = self.hot_queue.push((id, conn)) {
            // hot queue is full
            let mut connections = self.connections.lock();
            connections.insert(node.0, node.1); // TODO: check dup
        }
    }

    /// Returns `true` if the pool node contains no connections in either the hot queue
    /// or the overflow hash map.
    ///
    /// # Concurrency note
    ///
    /// This check is not atomic across the two internal stores (`hot_queue` and
    /// `connections`). Between checking one and the other, a concurrent `insert` or
    /// `get_any` could change the state. This is acceptable because callers use
    /// `is_empty` only as a hint to attempt cleanup, and always re-verify under
    /// an exclusive (write) lock before actually removing the node from the parent
    /// pool HashMap. A false-negative simply defers cleanup to the next opportunity;
    /// a false-positive is largely mitigated by the re-check (see
    /// [`ConnectionPool::try_remove_empty_node`] for residual race-window analysis).
    pub fn is_empty(&self) -> bool {
        // Check the lock-free queue first (cheap atomic load) to avoid acquiring
        // the mutex in the common case where connections are present.
        self.hot_queue.is_empty() && self.connections.lock().is_empty()
    }

    // This function acquires 2 locks and iterates over the entire hot queue.
    // But it should be fine because remove() rarely happens on a busy PoolNode.
    /// Remove the item associated with the id from the pool. The item is returned
    /// if it is found and removed.
    pub fn remove(&self, id: ID) -> Option<T> {
        // check the table first as least recent used ones are likely there
        let removed = self.connections.lock().remove(&id);
        if removed.is_some() {
            return removed;
        } // lock drops here

        let _queue_lock = self.hot_queue_remove_lock.lock();
        // check the hot queue, note that the queue can be accessed in parallel by insert and get
        let max_len = self.hot_queue.len();
        for _ in 0..max_len {
            if let Some((conn_id, conn)) = self.hot_queue.pop() {
                if conn_id == id {
                    // this is the item, it is already popped
                    return Some(conn);
                } else {
                    // not this item, put back to hot queue, but it could also be full
                    self.insert(conn_id, conn);
                }
            } else {
                // other threads grab all the connections
                return None;
            }
        }
        None
        // _queue_lock drops here
    }
}

/// Connection pool
///
/// [ConnectionPool] holds reusable connections. A reusable connection is released to this pool to
/// be picked up by another user/request.
pub struct ConnectionPool<S> {
    // TODO: n-way pools to reduce lock contention
    pool: RwLock<HashMap<GroupKey, Arc<PoolNode<PoolConnection<S>>>>>,
    lru: Lru<ID, ConnectionMeta>,
}

impl<S> ConnectionPool<S> {
    /// Create a new [ConnectionPool] with a size limit.
    ///
    /// When a connection is released to this pool, the least recently used connection will be dropped.
    pub fn new(size: usize) -> Self {
        ConnectionPool {
            pool: RwLock::new(HashMap::with_capacity(size)), // this is oversized since some connections will have the same key
            lru: Lru::new(size),
        }
    }

    /* get or create and insert a pool node for the hash key */
    fn get_pool_node(&self, key: GroupKey) -> Arc<PoolNode<PoolConnection<S>>> {
        {
            let pool = self.pool.read();
            if let Some(v) = pool.get(&key) {
                return (*v).clone();
            }
        } // read lock released here

        {
            // write lock section
            let mut pool = self.pool.write();
            // check again since another task might have already added it
            if let Some(v) = pool.get(&key) {
                return (*v).clone();
            }
            let node = Arc::new(PoolNode::new());
            let node_ret = node.clone();
            pool.insert(key, node); // TODO: check dup
            node_ret
        }
    }

    /// Attempt to remove an empty [`PoolNode`] entry from the pool `HashMap`.
    ///
    /// This prevents unbounded growth of the pool map when many unique group keys
    /// are seen over the lifetime of the pool (e.g. connecting to many distinct
    /// upstreams). Without this cleanup, each unique `GroupKey` leaves an
    /// empty `PoolNode` behind even after all its connections are gone.
    ///
    /// The method acquires the pool write lock and re-checks emptiness to avoid
    /// removing a node that was concurrently repopulated between the caller's
    /// initial `is_empty()` hint and this write-lock acquisition.
    ///
    /// # Race window
    ///
    /// There is a narrow window where another thread could have called
    /// [`get_pool_node`] (obtaining a clone of the `Arc<PoolNode>`) just before
    /// we remove the entry. If that thread then inserts a connection into the
    /// now-orphaned node, the connection is dropped when the last `Arc` reference
    /// goes away. This is benign: the `oneshot::Sender` inside the dropped
    /// `PoolConnection` is also dropped, which resolves the corresponding
    /// `watch_use` receiver in `idle_poll`/`idle_timeout`, causing a clean exit.
    /// The next request to the same upstream simply creates a fresh connection.
    /// This trade-off matches the existing concurrency model of the pool and is
    /// consistent with how hyper-util and Go's `net/http` handle this case.
    fn try_remove_empty_node(&self, key: GroupKey) {
        let mut pool = self.pool.write();
        if let Some(node) = pool.get(&key) {
            if node.is_empty() {
                pool.remove(&key);
            }
        }
    }

    // only remove from the pool because lru already removed it
    fn pop_evicted(&self, meta: &ConnectionMeta) {
        let pool_node = {
            let pool = self.pool.read();
            match pool.get(&meta.key) {
                Some(v) => (*v).clone(),
                None => {
                    warn!("Fail to get pool node for {:?}", meta);
                    return;
                } // nothing to pop, should return error?
            }
        }; // read lock released here

        pool_node.remove(meta.id);
        debug!("evict fd: {} from key {}", meta.id, meta.key);

        // Clean up the PoolNode entry if it is now empty, to prevent unbounded
        // growth of the pool HashMap.
        // The is_empty() check avoids acquiring the write lock in the common case
        // where other connections still exist under this key.
        if pool_node.is_empty() {
            self.try_remove_empty_node(meta.key);
        }
    }

    pub fn pop_closed(&self, meta: &ConnectionMeta) {
        // NOTE: which of these should be done first?
        self.pop_evicted(meta);
        self.lru.pop(&meta.id);
    }

    /// Get a connection from this pool under the same group key
    pub fn get(&self, key: &GroupKey) -> Option<S> {
        let pool_node = {
            let pool = self.pool.read();
            match pool.get(key) {
                Some(v) => (*v).clone(),
                None => return None,
            }
        }; // read lock released here

        if let Some((id, connection)) = pool_node.get_any() {
            self.lru.pop(&id); // the notified is not needed

            // Clean up the now-empty node. This path is important because when a
            // connection is retrieved (not evicted), the idle_poll/idle_timeout
            // tasks exit via the watch_use channel and never call pop_closed(),
            // so pop_evicted's cleanup would never run for this key.
            if pool_node.is_empty() {
                self.try_remove_empty_node(*key);
            }

            Some(connection.release())
        } else {
            // The node exists but has no connections. Clean it up.
            self.try_remove_empty_node(*key);
            None
        }
    }

    /// Release a connection to this pool for reuse
    ///
    /// - The returned [`Arc<Notify>`] will notify any listen when the connection is evicted from the pool.
    /// - The returned [`oneshot::Receiver<bool>`] will notify when the connection is being picked up by [Self::get()].
    pub fn put(
        &self,
        meta: &ConnectionMeta,
        connection: S,
    ) -> (Arc<Notify>, oneshot::Receiver<bool>) {
        let (notify_close, replaced) = self.lru.add(meta.id, meta.clone());
        if let Some(meta) = replaced {
            self.pop_evicted(&meta);
        };
        let pool_node = self.get_pool_node(meta.key);
        let (notify_use, watch_use) = oneshot::channel();
        let connection = PoolConnection::new(notify_use, connection);
        pool_node.insert(meta.id, connection);
        (notify_close, watch_use)
    }

    /// Actively monitor the health of a connection that is already released to this pool
    ///
    /// When the connection breaks, or the optional `timeout` is reached this function will
    /// remove it from the pool and drop the connection.
    ///
    /// If the connection is reused via [Self::get()] or being evicted, this function will just exit.
    pub async fn idle_poll<Stream>(
        &self,
        connection: OwnedMutexGuard<Stream>,
        meta: &ConnectionMeta,
        timeout: Option<Duration>,
        notify_evicted: Arc<Notify>,
        watch_use: oneshot::Receiver<bool>,
    ) where
        Stream: AsyncRead + Unpin + Send,
    {
        let read_result = tokio::select! {
            biased;
            _ = watch_use => {
                debug!("idle connection is being picked up");
                return
            },
            _ = notify_evicted.notified() => {
                debug!("idle connection is being evicted");
                // TODO: gracefully close the connection?
                return
            }
            read_result = read_with_timeout(connection , timeout) => read_result
        };

        match read_result {
            Ok(n) => {
                if n > 0 {
                    warn!("Data received on idle client connection, close it")
                } else {
                    debug!("Peer closed the idle connection or timeout")
                }
            }

            Err(e) => {
                debug!("error with the idle connection, close it {:?}", e);
            }
        }
        // connection terminated from either peer or timer
        self.pop_closed(meta);
    }

    /// Passively wait to close the connection after the timeout
    ///
    /// If this connection is not being picked up or evicted before the timeout is reach, this
    /// function will remove it from the pool and close the connection.
    pub async fn idle_timeout(
        &self,
        meta: &ConnectionMeta,
        timeout: Option<Duration>,
        notify_evicted: Arc<Notify>,
        mut notify_closed: watch::Receiver<bool>,
        watch_use: oneshot::Receiver<bool>,
    ) {
        tokio::select! {
            biased;
            _ = watch_use => {
                debug!("idle connection is being picked up");
            },
            _ = notify_evicted.notified() => {
                debug!("idle connection is being evicted");
                // TODO: gracefully close the connection?
            }
            _ = notify_closed.changed() => {
                // assume always changed from false to true
                debug!("idle connection is being closed");
                self.pop_closed(meta);
            }
            // async expression is evaluated if timeout is None but it's never polled, set it to MAX
            _ = sleep(timeout.unwrap_or(Duration::MAX)), if timeout.is_some() => {
                debug!("idle connection is being evicted");
                self.pop_closed(meta);
            }
        };
    }
}

async fn read_with_timeout<S>(
    mut connection: OwnedMutexGuard<S>,
    timeout_duration: Option<Duration>,
) -> io::Result<usize>
where
    S: AsyncRead + Unpin + Send,
{
    let mut buf = [0; 1];
    let read_event = connection.read(&mut buf[..]);
    match timeout_duration {
        Some(d) => match timeout(d, read_event).await {
            Ok(res) => res,
            Err(e) => {
                debug!("keepalive timeout {:?} reached, {:?}", d, e);
                Ok(0)
            }
        },
        _ => read_event.await,
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use log::debug;
    use tokio::sync::Mutex as AsyncMutex;
    use tokio_test::io::{Builder, Mock};

    #[tokio::test]
    async fn test_lookup() {
        let meta1 = ConnectionMeta::new(101, 1);
        let value1 = "v1".to_string();
        let meta2 = ConnectionMeta::new(102, 2);
        let value2 = "v2".to_string();
        let meta3 = ConnectionMeta::new(101, 3);
        let value3 = "v3".to_string();
        let cp: ConnectionPool<String> = ConnectionPool::new(3); //#CP3
        cp.put(&meta1, value1.clone());
        cp.put(&meta2, value2.clone());
        cp.put(&meta3, value3.clone());

        let found_b = cp.get(&meta2.key).unwrap();
        assert_eq!(found_b, value2);

        let found_a1 = cp.get(&meta1.key).unwrap();
        let found_a2 = cp.get(&meta1.key).unwrap();

        assert!(
            found_a1 == value1 && found_a2 == value3 || found_a2 == value1 && found_a1 == value3
        );
    }

    #[tokio::test]
    async fn test_pop() {
        let meta1 = ConnectionMeta::new(101, 1);
        let value1 = "v1".to_string();
        let meta2 = ConnectionMeta::new(102, 2);
        let value2 = "v2".to_string();
        let meta3 = ConnectionMeta::new(101, 3);
        let value3 = "v3".to_string();
        let cp: ConnectionPool<String> = ConnectionPool::new(3); //#CP3
        cp.put(&meta1, value1);
        cp.put(&meta2, value2);
        cp.put(&meta3, value3.clone());

        cp.pop_closed(&meta1);

        let found_a1 = cp.get(&meta1.key).unwrap();
        assert_eq!(found_a1, value3);

        cp.pop_closed(&meta1);
        assert!(cp.get(&meta1.key).is_none())
    }

    #[tokio::test]
    async fn test_eviction() {
        let meta1 = ConnectionMeta::new(101, 1);
        let value1 = "v1".to_string();
        let meta2 = ConnectionMeta::new(102, 2);
        let value2 = "v2".to_string();
        let meta3 = ConnectionMeta::new(101, 3);
        let value3 = "v3".to_string();
        let cp: ConnectionPool<String> = ConnectionPool::new(2);
        let (notify_close1, _) = cp.put(&meta1, value1.clone());
        let (notify_close2, _) = cp.put(&meta2, value2.clone());
        let (notify_close3, _) = cp.put(&meta3, value3.clone()); // meta 1 should be evicted

        let closed_item = tokio::select! {
            _ = notify_close1.notified() => {debug!("notifier1"); 1},
            _ = notify_close2.notified() => {debug!("notifier2"); 2},
            _ = notify_close3.notified() => {debug!("notifier3"); 3},
        };
        assert_eq!(closed_item, 1);

        let found_a1 = cp.get(&meta1.key).unwrap();
        assert_eq!(found_a1, value3);
        assert_eq!(cp.get(&meta1.key), None)
    }

    #[tokio::test]
    #[should_panic(expected = "There is still data left to read.")]
    async fn test_read_close() {
        let meta1 = ConnectionMeta::new(101, 1);
        let mock_io1 = Arc::new(AsyncMutex::new(Builder::new().read(b"garbage").build()));
        let meta2 = ConnectionMeta::new(102, 2);
        let mock_io2 = Arc::new(AsyncMutex::new(
            Builder::new().wait(Duration::from_secs(99)).build(),
        ));
        let meta3 = ConnectionMeta::new(101, 3);
        let mock_io3 = Arc::new(AsyncMutex::new(
            Builder::new().wait(Duration::from_secs(99)).build(),
        ));
        let cp: ConnectionPool<Arc<AsyncMutex<Mock>>> = ConnectionPool::new(3);
        let (c1, u1) = cp.put(&meta1, mock_io1.clone());
        let (c2, u2) = cp.put(&meta2, mock_io2.clone());
        let (c3, u3) = cp.put(&meta3, mock_io3.clone());

        let closed_item = tokio::select! {
            _ = cp.idle_poll(mock_io1.try_lock_owned().unwrap(), &meta1, None, c1, u1) => {debug!("notifier1"); 1},
            _ = cp.idle_poll(mock_io2.try_lock_owned().unwrap(), &meta1, None, c2, u2) => {debug!("notifier2"); 2},
            _ = cp.idle_poll(mock_io3.try_lock_owned().unwrap(), &meta1, None, c3, u3) => {debug!("notifier3"); 3},
        };
        assert_eq!(closed_item, 1);

        let _ = cp.get(&meta1.key).unwrap(); // mock_io3 should be selected
        assert!(cp.get(&meta1.key).is_none()) // mock_io1 should already be removed by idle_poll
    }

    #[tokio::test]
    async fn test_read_timeout() {
        let meta1 = ConnectionMeta::new(101, 1);
        let mock_io1 = Arc::new(AsyncMutex::new(
            Builder::new().wait(Duration::from_secs(99)).build(),
        ));
        let meta2 = ConnectionMeta::new(102, 2);
        let mock_io2 = Arc::new(AsyncMutex::new(
            Builder::new().wait(Duration::from_secs(99)).build(),
        ));
        let meta3 = ConnectionMeta::new(101, 3);
        let mock_io3 = Arc::new(AsyncMutex::new(
            Builder::new().wait(Duration::from_secs(99)).build(),
        ));
        let cp: ConnectionPool<Arc<AsyncMutex<Mock>>> = ConnectionPool::new(3);
        let (c1, u1) = cp.put(&meta1, mock_io1.clone());
        let (c2, u2) = cp.put(&meta2, mock_io2.clone());
        let (c3, u3) = cp.put(&meta3, mock_io3.clone());

        let closed_item = tokio::select! {
            _ = cp.idle_poll(mock_io1.try_lock_owned().unwrap(), &meta1, Some(Duration::from_secs(1)), c1, u1) => {debug!("notifier1"); 1},
            _ = cp.idle_poll(mock_io2.try_lock_owned().unwrap(), &meta1, Some(Duration::from_secs(2)), c2, u2) => {debug!("notifier2"); 2},
            _ = cp.idle_poll(mock_io3.try_lock_owned().unwrap(), &meta1, Some(Duration::from_secs(3)), c3, u3) => {debug!("notifier3"); 3},
        };
        assert_eq!(closed_item, 1);

        let _ = cp.get(&meta1.key).unwrap(); // mock_io3 should be selected
        assert!(cp.get(&meta1.key).is_none()) // mock_io1 should already be removed by idle_poll
    }

    #[tokio::test]
    async fn test_evict_poll() {
        let meta1 = ConnectionMeta::new(101, 1);
        let mock_io1 = Arc::new(AsyncMutex::new(
            Builder::new().wait(Duration::from_secs(99)).build(),
        ));
        let meta2 = ConnectionMeta::new(102, 2);
        let mock_io2 = Arc::new(AsyncMutex::new(
            Builder::new().wait(Duration::from_secs(99)).build(),
        ));
        let meta3 = ConnectionMeta::new(101, 3);
        let mock_io3 = Arc::new(AsyncMutex::new(
            Builder::new().wait(Duration::from_secs(99)).build(),
        ));
        let cp: ConnectionPool<Arc<AsyncMutex<Mock>>> = ConnectionPool::new(2);
        let (c1, u1) = cp.put(&meta1, mock_io1.clone());
        let (c2, u2) = cp.put(&meta2, mock_io2.clone());
        let (c3, u3) = cp.put(&meta3, mock_io3.clone()); // 1 should be evicted at this point

        let closed_item = tokio::select! {
            _ = cp.idle_poll(mock_io1.try_lock_owned().unwrap(), &meta1, None, c1, u1) => {debug!("notifier1"); 1},
            _ = cp.idle_poll(mock_io2.try_lock_owned().unwrap(), &meta1, None, c2, u2) => {debug!("notifier2"); 2},
            _ = cp.idle_poll(mock_io3.try_lock_owned().unwrap(), &meta1, None, c3, u3) => {debug!("notifier3"); 3},
        };
        assert_eq!(closed_item, 1);

        let _ = cp.get(&meta1.key).unwrap(); // mock_io3 should be selected
        assert!(cp.get(&meta1.key).is_none()) // mock_io1 should already be removed by idle_poll
    }

    #[test]
    fn test_pool_node_is_empty() {
        let node: PoolNode<String> = PoolNode::new();
        assert!(node.is_empty(), "newly created node should be empty");

        node.insert(1, "v1".to_string());
        assert!(!node.is_empty(), "node with one item should not be empty");

        // get_any removes the item
        let item = node.get_any();
        assert!(item.is_some());
        assert!(node.is_empty(), "node should be empty after get_any");

        // insert then remove by id
        node.insert(2, "v2".to_string());
        assert!(!node.is_empty());

        let removed = node.remove(2);
        assert!(removed.is_some());
        assert!(node.is_empty(), "node should be empty after remove");
    }

    #[test]
    fn test_pool_node_is_empty_overflow_to_connections() {
        // Fill the hot queue (capacity = HOT_QUEUE_SIZE = 16), then overflow
        // into the connections HashMap, and verify is_empty drains both.
        let node: PoolNode<String> = PoolNode::new();

        for i in 0..(HOT_QUEUE_SIZE as i32 + 4) {
            node.insert(i, format!("v{i}"));
        }
        assert!(!node.is_empty());

        // Drain all items via get_any
        while node.get_any().is_some() {}
        assert!(node.is_empty(), "node should be empty after draining all");
    }

    #[tokio::test]
    async fn test_empty_node_removed_after_pop_closed() {
        // Reproducer from GitHub issue #748: a single connection is added and
        // then closed. The PoolNode entry in the pool HashMap must be removed.
        let meta = ConnectionMeta::new(101, 1);
        let cp: ConnectionPool<String> = ConnectionPool::new(2);
        cp.put(&meta, "v1".to_string());

        assert_eq!(cp.pool.read().len(), 1, "pool should have 1 node");

        cp.pop_closed(&meta);

        assert_eq!(
            cp.pool.read().len(),
            0,
            "empty PoolNode should be removed after pop_closed"
        );
    }

    #[tokio::test]
    async fn test_empty_node_removed_after_get() {
        // When the last connection is retrieved via get(), the PoolNode should
        // be cleaned up. This path is distinct from pop_closed because the
        // idle_poll/idle_timeout tasks exit via the watch_use channel and never
        // call pop_closed.
        let meta = ConnectionMeta::new(101, 1);
        let cp: ConnectionPool<String> = ConnectionPool::new(2);
        cp.put(&meta, "v1".to_string());

        assert_eq!(cp.pool.read().len(), 1);

        let conn = cp.get(&meta.key);
        assert!(conn.is_some());

        assert_eq!(
            cp.pool.read().len(),
            0,
            "empty PoolNode should be removed after get() takes the last connection"
        );
    }

    #[tokio::test]
    async fn test_empty_node_removed_when_get_finds_empty_node() {
        // If a node exists but has no connections (e.g. they were all evicted
        // by the LRU), get() should clean up the empty node.
        let meta1 = ConnectionMeta::new(101, 1);
        let meta2 = ConnectionMeta::new(101, 2);
        let cp: ConnectionPool<String> = ConnectionPool::new(4);
        cp.put(&meta1, "v1".to_string());
        cp.put(&meta2, "v2".to_string());

        // Remove both connections via pop_closed, but the first pop_closed
        // won't remove the node since meta2 is still there.
        cp.pop_closed(&meta1);
        assert_eq!(cp.pool.read().len(), 1, "node should still exist");

        cp.pop_closed(&meta2);
        assert_eq!(
            cp.pool.read().len(),
            0,
            "node should be removed after last connection is popped"
        );
    }

    #[tokio::test]
    async fn test_node_not_removed_when_connections_remain() {
        // Removing one connection from a node that has others must NOT remove
        // the node itself.
        let meta1 = ConnectionMeta::new(101, 1);
        let meta2 = ConnectionMeta::new(101, 2);
        let cp: ConnectionPool<String> = ConnectionPool::new(4);
        cp.put(&meta1, "v1".to_string());
        cp.put(&meta2, "v2".to_string());

        cp.pop_closed(&meta1);

        assert!(
            cp.pool.read().contains_key(&101),
            "node should still exist because meta2's connection is still in it"
        );
        assert_eq!(cp.pool.read().len(), 1);

        // The remaining connection should still be retrievable
        let conn = cp.get(&meta1.key);
        assert!(conn.is_some());
    }

    #[tokio::test]
    async fn test_empty_node_cleanup_only_affects_target_key() {
        // Cleaning up an empty node for one key must not affect other keys.
        let meta_a = ConnectionMeta::new(101, 1);
        let meta_b = ConnectionMeta::new(202, 2);
        let cp: ConnectionPool<String> = ConnectionPool::new(4);
        cp.put(&meta_a, "a".to_string());
        cp.put(&meta_b, "b".to_string());

        assert_eq!(cp.pool.read().len(), 2);

        // Remove all connections for key 101
        cp.pop_closed(&meta_a);

        assert_eq!(
            cp.pool.read().len(),
            1,
            "only key 101's empty node should be removed"
        );
        assert!(!cp.pool.read().contains_key(&101), "key 101 should be gone");
        assert!(cp.pool.read().contains_key(&202), "key 202 should remain");

        // key 202's connection should still be retrievable
        let conn = cp.get(&meta_b.key);
        assert_eq!(conn, Some("b".to_string()));
    }

    #[tokio::test]
    async fn test_empty_node_cleaned_after_lru_eviction() {
        // When LRU eviction removes the last connection for a key, the empty
        // node should be cleaned up by pop_evicted (called from put()).
        let meta1 = ConnectionMeta::new(101, 1);
        let meta2 = ConnectionMeta::new(202, 2);
        let cp: ConnectionPool<String> = ConnectionPool::new(1);

        cp.put(&meta1, "v1".to_string());
        assert_eq!(cp.pool.read().len(), 1);

        // This put evicts meta1 (LRU size = 1), making key 101's node empty.
        cp.put(&meta2, "v2".to_string());

        assert!(
            !cp.pool.read().contains_key(&101),
            "key 101's empty node should be removed after its only connection was evicted"
        );
        assert!(cp.pool.read().contains_key(&202));
    }

    #[tokio::test]
    async fn test_node_reusable_after_cleanup() {
        // After an empty node is cleaned up, inserting a new connection for the
        // same key should work correctly (a new PoolNode is created).
        let meta1 = ConnectionMeta::new(101, 1);
        let cp: ConnectionPool<String> = ConnectionPool::new(4);
        cp.put(&meta1, "first".to_string());

        cp.pop_closed(&meta1);
        assert_eq!(cp.pool.read().len(), 0, "node should be cleaned up");

        // Re-insert for the same key
        let meta2 = ConnectionMeta::new(101, 2);
        cp.put(&meta2, "second".to_string());

        assert_eq!(cp.pool.read().len(), 1);
        let conn = cp.get(&meta2.key);
        assert_eq!(conn, Some("second".to_string()));

        assert_eq!(
            cp.pool.read().len(),
            0,
            "node should be cleaned up again after get"
        );
    }
}


================================================
FILE: pingora-pool/src/lib.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Generic connection pooling
//!
//! The pool is optimized for high concurrency, high RPS use cases. Each connection group has a
//! lock free hot pool to reduce the lock contention when some connections are reused and released
//! very frequently.

#![warn(clippy::all)]
#![allow(clippy::new_without_default)]
#![allow(clippy::type_complexity)]

mod connection;
mod lru;

pub use connection::{ConnectionMeta, ConnectionPool, PoolNode};


================================================
FILE: pingora-pool/src/lru.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use core::hash::Hash;
use lru::LruCache;
use parking_lot::RwLock;
use std::cell::RefCell;
use std::sync::atomic::{AtomicBool, Ordering::Relaxed};
use std::sync::Arc;
use thread_local::ThreadLocal;
use tokio::sync::Notify;

pub struct Node<T> {
    pub close_notifier: Arc<Notify>,
    pub meta: T,
}

impl<T> Node<T> {
    pub fn new(meta: T) -> Self {
        Node {
            close_notifier: Arc::new(Notify::new()),
            meta,
        }
    }

    pub fn notify_close(&self) {
        self.close_notifier.notify_one();
    }
}

pub struct Lru<K, T>
where
    K: Send,
    T: Send,
{
    lru: RwLock<ThreadLocal<RefCell<LruCache<K, Node<T>>>>>,
    size: usize,
    drain: AtomicBool,
}

impl<K, T> Lru<K, T>
where
    K: Hash + Eq + Send,
    T: Send,
{
    pub fn new(size: usize) -> Self {
        Lru {
            lru: RwLock::new(ThreadLocal::new()),
            size,
            drain: AtomicBool::new(false),
        }
    }

    // put a node in and return the meta of the replaced node
    pub fn put(&self, key: K, value: Node<T>) -> Option<T> {
        if self.drain.load(Relaxed) {
            value.notify_close(); // sort of hack to simulate being evicted right away
            return None;
        }
        let lru = self.lru.read(); /* read lock */
        let lru_cache = &mut *(lru
            .get_or(|| RefCell::new(LruCache::unbounded()))
            .borrow_mut());
        lru_cache.put(key, value);
        if lru_cache.len() > self.size {
            match lru_cache.pop_lru() {
                Some((_, v)) => {
                    // TODO: drop the lock here?
                    v.notify_close();
                    return Some(v.meta);
                }
                None => return None,
            }
        }
        None
        /* read lock dropped */
    }

    pub fn add(&self, key: K, meta: T) -> (Arc<Notify>, Option<T>) {
        let node = Node::new(meta);
        let notifier = node.close_notifier.clone();
        // TODO: check if the key is already in it
        (notifier, self.put(key, node))
    }

    pub fn pop(&self, key: &K) -> Option<Node<T>> {
        let lru = self.lru.read(); /* read lock */
        let lru_cache = &mut *(lru
            .get_or(|| RefCell::new(LruCache::unbounded()))
            .borrow_mut());
        lru_cache.pop(key)
        /* read lock dropped */
    }

    #[allow(dead_code)]
    pub fn drain(&self) {
        self.drain.store(true, Relaxed);

        /* drain need to go through all the local lru cache objects
         * acquire an exclusive write lock to make it safe */
        let mut lru = self.lru.write(); /* write lock */
        let lru_cache_iter = lru.iter_mut();
        for lru_cache_rc in lru_cache_iter {
            let mut lru_cache = lru_cache_rc.borrow_mut();
            for (_, item) in lru_cache.iter() {
                item.notify_close();
            }
            lru_cache.clear();
        }
        /* write lock dropped */
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use log::debug;

    #[tokio::test]
    async fn test_evict_close() {
        let pool: Lru<i32, ()> = Lru::new(2);
        let (notifier1, _) = pool.add(1, ());
        let (notifier2, _) = pool.add(2, ());
        let (notifier3, _) = pool.add(3, ());
        let closed_item = tokio::select! {
            _ = notifier1.notified() => {debug!("notifier1"); 1},
            _ = notifier2.notified() => {debug!("notifier2"); 2},
            _ = notifier3.notified() => {debug!("notifier3"); 3},
        };
        assert_eq!(closed_item, 1);
    }

    #[tokio::test]
    async fn test_evict_close_with_pop() {
        let pool: Lru<i32, ()> = Lru::new(2);
        let (notifier1, _) = pool.add(1, ());
        let (notifier2, _) = pool.add(2, ());
        pool.pop(&1);
        let (notifier3, _) = pool.add(3, ());
        let (notifier4, _) = pool.add(4, ());
        let closed_item = tokio::select! {
            _ = notifier1.notified() => {debug!("notifier1"); 1},
            _ = notifier2.notified() => {debug!("notifier2"); 2},
            _ = notifier3.notified() => {debug!("notifier3"); 3},
            _ = notifier4.notified() => {debug!("notifier4"); 4},
        };
        assert_eq!(closed_item, 2);
    }

    #[tokio::test]
    async fn test_drain() {
        let pool: Lru<i32, ()> = Lru::new(4);
        let (notifier1, _) = pool.add(1, ());
        let (notifier2, _) = pool.add(2, ());
        let (notifier3, _) = pool.add(3, ());
        pool.drain();
        let (notifier4, _) = pool.add(4, ());

        tokio::join!(
            notifier1.notified(),
            notifier2.notified(),
            notifier3.notified(),
            notifier4.notified()
        );
    }
}


================================================
FILE: pingora-proxy/Cargo.toml
================================================
[package]
name = "pingora-proxy"
version = "0.8.0"
authors = ["Yuchen Wu <yuchen@cloudflare.com>"]
license = "Apache-2.0"
edition = "2021"
rust-version = "1.84"
repository = "https://github.com/cloudflare/pingora"
categories = ["asynchronous", "network-programming"]
keywords = ["async", "http", "proxy", "pingora"]
exclude = ["tests/*"]
description = """
Pingora HTTP proxy APIs and traits.
"""

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "pingora_proxy"
path = "src/lib.rs"

[dependencies]
pingora-error = { version = "0.8.0", path = "../pingora-error" }
pingora-core = { version = "0.8.0", path = "../pingora-core", default-features = false }
pingora-cache = { version = "0.8.0", path = "../pingora-cache", default-features = false }
tokio = { workspace = true, features = ["macros", "net"] }
pingora-http = { version = "0.8.0", path = "../pingora-http" }
http = { workspace = true }
futures = "0.3"
bytes = { workspace = true }
async-trait = { workspace = true }
log = { workspace = true }
h2 = { workspace = true }
once_cell = { workspace = true }
clap = { version = "4", features = ["derive"] }
regex = "1"
rand = "0.8"

[dev-dependencies]
reqwest = { version = "0.11", features = [
    "gzip",
    "rustls-tls",
], default-features = false }
httparse = { workspace = true }
tokio-test = "0.4"
env_logger = "0.11"
hyper = "0.14"
tokio-tungstenite = "0.20.1"
pingora-limits = { version = "0.8.0", path = "../pingora-limits" }
pingora-load-balancing = { version = "0.8.0", path = "../pingora-load-balancing", default-features=false }
prometheus = "0"
futures-util = "0.3"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
serde_yaml = "0.9"

[target.'cfg(unix)'.dev-dependencies]
hyperlocal = "0.8"

[features]
default = []
openssl = ["pingora-core/openssl", "pingora-cache/openssl", "openssl_derived"]
boringssl = [
    "pingora-core/boringssl",
    "pingora-cache/boringssl",
    "openssl_derived",
]
rustls = ["pingora-core/rustls", "pingora-cache/rustls", "any_tls"]
s2n = ["pingora-core/s2n", "pingora-cache/s2n", "any_tls"]
openssl_derived = ["any_tls"]
any_tls = []
sentry = ["pingora-core/sentry"]
connection_filter = ["pingora-core/connection_filter"]

[[example]]
name = "connection_filter"
required-features = ["connection_filter"]

# or locally cargo doc --config "build.rustdocflags='--cfg doc_async_trait'"
[package.metadata.docs.rs]
rustdoc-args = ["--cfg", "doc_async_trait"]

[lints.rust]
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(doc_async_trait)'] }


================================================
FILE: pingora-proxy/LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: pingora-proxy/examples/backoff_retry.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::time::Duration;

use async_trait::async_trait;

use log::info;
use pingora_core::server::Server;
use pingora_core::upstreams::peer::HttpPeer;
use pingora_core::Result;
use pingora_core::{prelude::Opt, Error};
use pingora_proxy::{ProxyHttp, Session};

/// This example shows how to setup retry-able errors with a backoff policy

#[derive(Default)]
struct RetryCtx {
    pub retries: u32,
}

struct BackoffRetryProxy;

#[async_trait]
impl ProxyHttp for BackoffRetryProxy {
    type CTX = RetryCtx;
    fn new_ctx(&self) -> Self::CTX {
        Self::CTX::default()
    }

    fn fail_to_connect(
        &self,
        _session: &mut Session,
        _peer: &HttpPeer,
        ctx: &mut Self::CTX,
        e: Box<Error>,
    ) -> Box<Error> {
        ctx.retries += 1;
        let mut retry_e = e;
        retry_e.set_retry(true);
        retry_e
    }

    async fn upstream_peer(
        &self,
        _session: &mut Session,
        ctx: &mut Self::CTX,
    ) -> Result<Box<HttpPeer>> {
        const MAX_SLEEP: Duration = Duration::from_secs(10);

        if ctx.retries > 0 {
            // simple example of exponential backoff with a max of 10s
            let sleep_ms =
                std::cmp::min(Duration::from_millis(u64::pow(10, ctx.retries)), MAX_SLEEP);
            info!("sleeping for ms: {sleep_ms:?}");
            tokio::time::sleep(sleep_ms).await;
        }
        let mut peer = HttpPeer::new(("10.0.0.1", 80), false, "".into());
        peer.options.connection_timeout = Some(Duration::from_millis(100));
        Ok(Box::new(peer))
    }
}

// RUST_LOG=INFO cargo run --example backoff_retry -- --conf examples/conf.yaml

fn main() {
    env_logger::init();

    // read command line arguments
    let opt = Opt::parse_args();
    let mut my_server = Server::new(Some(opt)).unwrap();
    my_server.bootstrap();

    let mut my_proxy =
        pingora_proxy::http_proxy_service(&my_server.configuration, BackoffRetryProxy);
    my_proxy.add_tcp("0.0.0.0:6195");

    my_server.add_service(my_proxy);
    my_server.run_forever();
}


================================================
FILE: pingora-proxy/examples/conf.yaml
================================================
---
version: 1
threads: 2
pid_file: /tmp/load_balancer.pid
error_log: /tmp/load_balancer_err.log
upgrade_sock: /tmp/load_balancer.sock
max_retries: 5


================================================
FILE: pingora-proxy/examples/connection_filter.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use async_trait::async_trait;
use clap::Parser;
use log::info;
use pingora_core::listeners::ConnectionFilter;
use pingora_core::prelude::Opt;
use pingora_core::server::Server;
use pingora_core::upstreams::peer::HttpPeer;
use pingora_core::Result;
use pingora_proxy::{ProxyHttp, Session};
use std::sync::Arc;

/// This example demonstrates how to implement a connection filter
pub struct MyProxy;

#[async_trait]
impl ProxyHttp for MyProxy {
    type CTX = ();

    fn new_ctx(&self) -> Self::CTX {}

    async fn upstream_peer(
        &self,
        _session: &mut Session,
        _ctx: &mut Self::CTX,
    ) -> Result<Box<HttpPeer>> {
        // Forward to httpbin.org for testing
        let peer = HttpPeer::new(("httpbin.org", 80), false, "httpbin.org".into());
        Ok(Box::new(peer))
    }
}

/// Connection filter that blocks ALL connections (for testing)
#[derive(Debug, Clone)]
struct BlockAllFilter;

#[async_trait]
impl ConnectionFilter for BlockAllFilter {
    async fn should_accept(&self, addr: &std::net::SocketAddr) -> bool {
        info!("BLOCKING connection from {} (BlockAllFilter active)", addr);
        false
    }
}

// RUST_LOG=INFO cargo run --example connection_filter

fn main() {
    env_logger::init();

    // read command line arguments
    let opt = Opt::parse();
    let mut my_server = Server::new(Some(opt)).unwrap();
    my_server.bootstrap();

    let mut my_proxy = pingora_proxy::http_proxy_service(&my_server.configuration, MyProxy);

    // Create a filter that blocks ALL connections
    let filter = Arc::new(BlockAllFilter);

    info!("Setting BlockAllFilter on proxy service");
    my_proxy.set_connection_filter(filter.clone());

    info!("Adding TCP endpoints AFTER setting filter");
    my_proxy.add_tcp("0.0.0.0:6195");
    my_proxy.add_tcp("0.0.0.0:6196");

    info!("====================================");
    info!("Server starting with BlockAllFilter");
    info!("This filter blocks ALL connections!");
    info!("====================================");
    info!("");
    info!("Test with:");
    info!("  curl http://localhost:6195/get");
    info!("  curl http://localhost:6196/get");
    info!("");
    info!("ALL requests should be blocked!");
    info!("You should see 'BLOCKING connection' in the logs");
    info!("and curl should fail with 'Connection refused' or hang");
    info!("");

    my_server.add_service(my_proxy);
    my_server.run_forever();
}


================================================
FILE: pingora-proxy/examples/ctx.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use async_trait::async_trait;
use log::info;
use std::sync::Mutex;

use pingora_core::server::configuration::Opt;
use pingora_core::server::Server;
use pingora_core::upstreams::peer::HttpPeer;
use pingora_core::Result;
use pingora_proxy::{ProxyHttp, Session};

// global counter
static REQ_COUNTER: Mutex<usize> = Mutex::new(0);

pub struct MyProxy {
    // counter for the service
    beta_counter: Mutex<usize>, // AtomicUsize works too
}

pub struct MyCtx {
    beta_user: bool,
}

fn check_beta_user(req: &pingora_http::RequestHeader) -> bool {
    // some simple logic to check if user is beta
    req.headers.get("beta-flag").is_some()
}

#[async_trait]
impl ProxyHttp for MyProxy {
    type CTX = MyCtx;
    fn new_ctx(&self) -> Self::CTX {
        MyCtx { beta_user: false }
    }

    async fn request_filter(&self, session: &mut Session, ctx: &mut Self::CTX) -> Result<bool> {
        ctx.beta_user = check_beta_user(session.req_header());
        Ok(false)
    }

    async fn upstream_peer(
        &self,
        _session: &mut Session,
        ctx: &mut Self::CTX,
    ) -> Result<Box<HttpPeer>> {
        let mut req_counter = REQ_COUNTER.lock().unwrap();
        *req_counter += 1;

        let addr = if ctx.beta_user {
            let mut beta_count = self.beta_counter.lock().unwrap();
            *beta_count += 1;
            info!("I'm a beta user #{beta_count}");
            ("1.0.0.1", 443)
        } else {
            info!("I'm an user #{req_counter}");
            ("1.1.1.1", 443)
        };

        let peer = Box::new(HttpPeer::new(addr, true, "one.one.one.one".to_string()));
        Ok(peer)
    }
}

// RUST_LOG=INFO cargo run --example ctx
// curl 127.0.0.1:6190 -H "Host: one.one.one.one"
// curl 127.0.0.1:6190 -H "Host: one.one.one.one" -H "beta-flag: 1"
fn main() {
    env_logger::init();

    // read command line arguments
    let opt = Opt::parse_args();
    let mut my_server = Server::new(Some(opt)).unwrap();
    my_server.bootstrap();

    let mut my_proxy = pingora_proxy::http_proxy_service(
        &my_server.configuration,
        MyProxy {
            beta_counter: Mutex::new(0),
        },
    );
    my_proxy.add_tcp("0.0.0.0:6190");

    my_server.add_service(my_proxy);
    my_server.run_forever();
}


================================================
FILE: pingora-proxy/examples/gateway.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use async_trait::async_trait;
use bytes::Bytes;
use log::info;
use prometheus::register_int_counter;

use pingora_core::server::configuration::Opt;
use pingora_core::server::Server;
use pingora_core::upstreams::peer::HttpPeer;
use pingora_core::Result;
use pingora_http::ResponseHeader;
use pingora_proxy::{ProxyHttp, Session};

fn check_login(req: &pingora_http::RequestHeader) -> bool {
    // implement you logic check logic here
    req.headers.get("Authorization").map(|v| v.as_bytes()) == Some(b"password")
}

pub struct MyGateway {
    req_metric: prometheus::IntCounter,
}

#[async_trait]
impl ProxyHttp for MyGateway {
    type CTX = ();
    fn new_ctx(&self) -> Self::CTX {}

    async fn request_filter(&self, session: &mut Session, _ctx: &mut Self::CTX) -> Result<bool> {
        if session.req_header().uri.path().starts_with("/login")
            && !check_login(session.req_header())
        {
            let _ = session
                .respond_error_with_body(403, Bytes::from_static(b"no way!"))
                .await;
            // true: early return as the response is already written
            return Ok(true);
        }
        Ok(false)
    }

    async fn upstream_peer(
        &self,
        session: &mut Session,
        _ctx: &mut Self::CTX,
    ) -> Result<Box<HttpPeer>> {
        let addr = if session.req_header().uri.path().starts_with("/family") {
            ("1.0.0.1", 443)
        } else {
            ("1.1.1.1", 443)
        };

        info!("connecting to {addr:?}");

        let peer = Box::new(HttpPeer::new(addr, true, "one.one.one.one".to_string()));
        Ok(peer)
    }

    async fn response_filter(
        &self,
        _session: &mut Session,
        upstream_response: &mut ResponseHeader,
        _ctx: &mut Self::CTX,
    ) -> Result<()>
    where
        Self::CTX: Send + Sync,
    {
        // replace existing header if any
        upstream_response
            .insert_header("Server", "MyGateway")
            .unwrap();
        // because we don't support h3
        upstream_response.remove_header("alt-svc");

        Ok(())
    }

    async fn logging(
        &self,
        session: &mut Session,
        _e: Option<&pingora_core::Error>,
        ctx: &mut Self::CTX,
    ) {
        let response_code = session
            .response_written()
            .map_or(0, |resp| resp.status.as_u16());
        info!(
            "{} response code: {response_code}",
            self.request_summary(session, ctx)
        );

        self.req_metric.inc();
    }
}

// RUST_LOG=INFO cargo run --example gateway
// curl 127.0.0.1:6191 -H "Host: one.one.one.one"
// curl 127.0.0.1:6190/family/ -H "Host: one.one.one.one"
// curl 127.0.0.1:6191/login/ -H "Host: one.one.one.one" -I -H "Authorization: password"
// curl 127.0.0.1:6191/login/ -H "Host: one.one.one.one" -I -H "Authorization: bad"
// For metrics
// curl 127.0.0.1:6192/
fn main() {
    env_logger::init();

    // read command line arguments
    let opt = Opt::parse_args();
    let mut my_server = Server::new(Some(opt)).unwrap();
    my_server.bootstrap();

    let mut my_proxy = pingora_proxy::http_proxy_service(
        &my_server.configuration,
        MyGateway {
            req_metric: register_int_counter!("req_counter", "Number of requests").unwrap(),
        },
    );
    my_proxy.add_tcp("0.0.0.0:6191");
    my_server.add_service(my_proxy);

    let mut prometheus_service_http =
        pingora_core::services::listening::Service::prometheus_http_service();
    prometheus_service_http.add_tcp("127.0.0.1:6192");
    my_server.add_service(prometheus_service_http);

    my_server.run_forever();
}


================================================
FILE: pingora-proxy/examples/grpc_web_module.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use async_trait::async_trait;

use pingora_core::server::Server;
use pingora_core::upstreams::peer::HttpPeer;
use pingora_core::Result;
use pingora_core::{
    modules::http::{
        grpc_web::{GrpcWeb, GrpcWebBridge},
        HttpModules,
    },
    prelude::Opt,
};
use pingora_proxy::{ProxyHttp, Session};

// This example shows how to use the gRPC-web bridge module

pub struct GrpcWebBridgeProxy;

#[async_trait]
impl ProxyHttp for GrpcWebBridgeProxy {
    type CTX = ();
    fn new_ctx(&self) -> Self::CTX {}

    fn init_downstream_modules(&self, modules: &mut HttpModules) {
        // Add the gRPC web module
        modules.add_module(Box::new(GrpcWeb))
    }

    async fn early_request_filter(
        &self,
        session: &mut Session,
        _ctx: &mut Self::CTX,
    ) -> Result<()> {
        let grpc = session
            .downstream_modules_ctx
            .get_mut::<GrpcWebBridge>()
            .expect("GrpcWebBridge module added");

        // initialize gRPC module for this request
        grpc.init();
        Ok(())
    }

    async fn upstream_peer(
        &self,
        _session: &mut Session,
        _ctx: &mut Self::CTX,
    ) -> Result<Box<HttpPeer>> {
        // this needs to be your gRPC server
        let grpc_peer = Box::new(HttpPeer::new(
            ("1.1.1.1", 443),
            true,
            "one.one.one.one".to_string(),
        ));
        Ok(grpc_peer)
    }
}

// RUST_LOG=INFO cargo run --example grpc_web_module

fn main() {
    env_logger::init();

    // read command line arguments
    let opt = Opt::parse_args();
    let mut my_server = Server::new(Some(opt)).unwrap();
    my_server.bootstrap();

    let mut my_proxy =
        pingora_proxy::http_proxy_service(&my_server.configuration, GrpcWebBridgeProxy);
    my_proxy.add_tcp("0.0.0.0:6194");

    my_server.add_service(my_proxy);
    my_server.run_forever();
}


================================================
FILE: pingora-proxy/examples/load_balancer.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use async_trait::async_trait;
use log::info;
use pingora_core::services::background::background_service;
use std::{sync::Arc, time::Duration};

use pingora_core::server::configuration::Opt;
use pingora_core::server::Server;
use pingora_core::upstreams::peer::HttpPeer;
use pingora_core::Result;
use pingora_load_balancing::{health_check, selection::RoundRobin, LoadBalancer};
use pingora_proxy::{ProxyHttp, Session};

pub struct LB(Arc<LoadBalancer<RoundRobin>>);

#[async_trait]
impl ProxyHttp for LB {
    type CTX = ();
    fn new_ctx(&self) -> Self::CTX {}

    async fn upstream_peer(&self, _session: &mut Session, _ctx: &mut ()) -> Result<Box<HttpPeer>> {
        let upstream = self
            .0
            .select(b"", 256) // hash doesn't matter
            .unwrap();

        info!("upstream peer is: {:?}", upstream);

        let peer = Box::new(HttpPeer::new(upstream, true, "one.one.one.one".to_string()));
        Ok(peer)
    }

    async fn upstream_request_filter(
        &self,
        _session: &mut Session,
        upstream_request: &mut pingora_http::RequestHeader,
        _ctx: &mut Self::CTX,
    ) -> Result<()> {
        upstream_request
            .insert_header("Host", "one.one.one.one")
            .unwrap();
        Ok(())
    }
}

// RUST_LOG=INFO cargo run --example load_balancer
fn main() {
    env_logger::init();

    // read command line arguments
    let opt = Opt::parse_args();
    let mut my_server = Server::new(Some(opt)).unwrap();
    my_server.bootstrap();

    // 127.0.0.1:343" is just a bad server
    let mut upstreams =
        LoadBalancer::try_from_iter(["1.1.1.1:443", "1.0.0.1:443", "127.0.0.1:343"]).unwrap();

    // We add health check in the background so that the bad server is never selected.
    let hc = health_check::TcpHealthCheck::new();
    upstreams.set_health_check(hc);
    upstreams.health_check_frequency = Some(Duration::from_secs(1));

    let background = background_service("health check", upstreams);

    let upstreams = background.task();

    let mut lb = pingora_proxy::http_proxy_service(&my_server.configuration, LB(upstreams));
    lb.add_tcp("0.0.0.0:6188");

    let cert_path = format!("{}/tests/keys/server.crt", env!("CARGO_MANIFEST_DIR"));
    let key_path = format!("{}/tests/keys/key.pem", env!("CARGO_MANIFEST_DIR"));

    let mut tls_settings =
        pingora_core::listeners::tls::TlsSettings::intermediate(&cert_path, &key_path).unwrap();
    tls_settings.enable_h2();
    lb.add_tls_with_settings("0.0.0.0:6189", None, tls_settings);

    my_server.add_service(lb);
    my_server.add_service(background);
    my_server.run_forever();
}


================================================
FILE: pingora-proxy/examples/modify_response.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use async_trait::async_trait;
use bytes::Bytes;
use serde::{Deserialize, Serialize};
use std::net::ToSocketAddrs;

use pingora_core::server::configuration::Opt;
use pingora_core::server::Server;
use pingora_core::upstreams::peer::HttpPeer;
use pingora_core::Result;
use pingora_http::ResponseHeader;
use pingora_proxy::{ProxyHttp, Session};

const HOST: &str = "ip.jsontest.com";

#[derive(Serialize, Deserialize)]
pub struct Resp {
    ip: String,
}

pub struct Json2Yaml {
    addr: std::net::SocketAddr,
}

pub struct MyCtx {
    buffer: Vec<u8>,
}

#[async_trait]
impl ProxyHttp for Json2Yaml {
    type CTX = MyCtx;
    fn new_ctx(&self) -> Self::CTX {
        MyCtx { buffer: vec![] }
    }

    async fn upstream_peer(
        &self,
        _session: &mut Session,
        _ctx: &mut Self::CTX,
    ) -> Result<Box<HttpPeer>> {
        let peer = Box::new(HttpPeer::new(self.addr, false, HOST.to_owned()));
        Ok(peer)
    }

    async fn upstream_request_filter(
        &self,
        _session: &mut Session,
        upstream_request: &mut pingora_http::RequestHeader,
        _ctx: &mut Self::CTX,
    ) -> Result<()> {
        upstream_request
            .insert_header("Host", HOST.to_owned())
            .unwrap();
        Ok(())
    }

    async fn response_filter(
        &self,
        _session: &mut Session,
        upstream_response: &mut ResponseHeader,
        _ctx: &mut Self::CTX,
    ) -> Result<()>
    where
        Self::CTX: Send + Sync,
    {
        // Remove content-length because the size of the new body is unknown
        upstream_response.remove_header("Content-Length");
        upstream_response
            .insert_header("Transfer-Encoding", "Chunked")
            .unwrap();
        Ok(())
    }

    fn response_body_filter(
        &self,
        _session: &mut Session,
        body: &mut Option<Bytes>,
        end_of_stream: bool,
        ctx: &mut Self::CTX,
    ) -> Result<Option<std::time::Duration>>
    where
        Self::CTX: Send + Sync,
    {
        // buffer the data
        if let Some(b) = body {
            ctx.buffer.extend(&b[..]);
            // drop the body
            b.clear();
        }
        if end_of_stream {
            // This is the last chunk, we can process the data now
            let json_body: Resp = serde_json::de::from_slice(&ctx.buffer).unwrap();
            let yaml_body = serde_yaml::to_string(&json_body).unwrap();
            *body = Some(Bytes::copy_from_slice(yaml_body.as_bytes()));
        }

        Ok(None)
    }
}

// RUST_LOG=INFO cargo run --example modify_response
// curl 127.0.0.1:6191
fn main() {
    env_logger::init();

    let opt = Opt::parse_args();
    let mut my_server = Server::new(Some(opt)).unwrap();
    my_server.bootstrap();

    let mut my_proxy = pingora_proxy::http_proxy_service(
        &my_server.configuration,
        Json2Yaml {
            // hardcode the IP of ip.jsontest.com for now
            addr: ("142.251.2.121", 80)
                .to_socket_addrs()
                .unwrap()
                .next()
                .unwrap(),
        },
    );

    my_proxy.add_tcp("127.0.0.1:6191");

    my_server.add_service(my_proxy);
    my_server.run_forever();
}


================================================
FILE: pingora-proxy/examples/multi_lb.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use async_trait::async_trait;
use std::sync::Arc;

use pingora_core::{prelude::*, services::background::GenBackgroundService};
use pingora_load_balancing::{
    health_check::TcpHealthCheck,
    selection::{BackendIter, BackendSelection, RoundRobin},
    LoadBalancer,
};
use pingora_proxy::{http_proxy_service, ProxyHttp, Session};

struct Router {
    cluster_one: Arc<LoadBalancer<RoundRobin>>,
    cluster_two: Arc<LoadBalancer<RoundRobin>>,
}

#[async_trait]
impl ProxyHttp for Router {
    type CTX = ();
    fn new_ctx(&self) {}

    async fn upstream_peer(&self, session: &mut Session, _ctx: &mut ()) -> Result<Box<HttpPeer>> {
        // determine LB cluster based on request uri
        let cluster = if session.req_header().uri.path().starts_with("/one/") {
            &self.cluster_one
        } else {
            &self.cluster_two
        };

        let upstream = cluster
            .select(b"", 256) // hash doesn't matter for round robin
            .unwrap();

        println!("upstream peer is: {upstream:?}");

        // Set SNI to one.one.one.one
        let peer = Box::new(HttpPeer::new(upstream, true, "one.one.one.one".to_string()));
        Ok(peer)
    }
}

fn build_cluster_service<S>(upstreams: &[&str]) -> GenBackgroundService<LoadBalancer<S>>
where
    S: BackendSelection + 'static,
    S::Iter: BackendIter,
{
    let mut cluster = LoadBalancer::try_from_iter(upstreams).unwrap();
    cluster.set_health_check(TcpHealthCheck::new());
    cluster.health_check_frequency = Some(std::time::Duration::from_secs(1));

    background_service("cluster health check", cluster)
}

// RUST_LOG=INFO cargo run --example multi_lb
// curl 127.0.0.1:6188/one/
// curl 127.0.0.1:6188/two/
fn main() {
    let mut my_server = Server::new(None).unwrap();
    my_server.bootstrap();

    // build multiple clusters
    let cluster_one = build_cluster_service::<RoundRobin>(&["1.1.1.1:443", "127.0.0.1:343"]);
    let cluster_two = build_cluster_service::<RoundRobin>(&["1.0.0.1:443", "127.0.0.2:343"]);

    let router = Router {
        cluster_one: cluster_one.task(),
        cluster_two: cluster_two.task(),
    };
    let mut router_service = http_proxy_service(&my_server.configuration, router);
    router_service.add_tcp("0.0.0.0:6188");

    my_server.add_service(router_service);
    my_server.add_service(cluster_one);
    my_server.add_service(cluster_two);

    my_server.run_forever();
}


================================================
FILE: pingora-proxy/examples/rate_limiter.rs
================================================
use async_trait::async_trait;
use once_cell::sync::Lazy;
use pingora_core::prelude::*;
use pingora_http::{RequestHeader, ResponseHeader};
use pingora_limits::rate::Rate;
use pingora_load_balancing::prelude::{RoundRobin, TcpHealthCheck};
use pingora_load_balancing::LoadBalancer;
use pingora_proxy::{http_proxy_service, ProxyHttp, Session};
use std::sync::Arc;
use std::time::Duration;

fn main() {
    let mut server = Server::new(Some(Opt::default())).unwrap();
    server.bootstrap();
    let mut upstreams = LoadBalancer::try_from_iter(["1.1.1.1:443", "1.0.0.1:443"]).unwrap();
    // Set health check
    let hc = TcpHealthCheck::new();
    upstreams.set_health_check(hc);
    upstreams.health_check_frequency = Some(Duration::from_secs(1));
    // Set background service
    let background = background_service("health check", upstreams);
    let upstreams = background.task();
    // Set load balancer
    let mut lb = http_proxy_service(&server.configuration, LB(upstreams));
    lb.add_tcp("0.0.0.0:6188");

    // let rate = Rate
    server.add_service(background);
    server.add_service(lb);
    server.run_forever();
}

pub struct LB(Arc<LoadBalancer<RoundRobin>>);

impl LB {
    pub fn get_request_appid(&self, session: &mut Session) -> Option<String> {
        match session
            .req_header()
            .headers
            .get("appid")
            .map(|v| v.to_str())
        {
            None => None,
            Some(v) => match v {
                Ok(v) => Some(v.to_string()),
                Err(_) => None,
            },
        }
    }
}

// Rate limiter
static RATE_LIMITER: Lazy<Rate> = Lazy::new(|| Rate::new(Duration::from_secs(1)));

// max request per second per client
static MAX_REQ_PER_SEC: isize = 1;

#[async_trait]
impl ProxyHttp for LB {
    type CTX = ();

    fn new_ctx(&self) {}

    async fn upstream_peer(
        &self,
        _session: &mut Session,
        _ctx: &mut Self::CTX,
    ) -> Result<Box<HttpPeer>> {
        let upstream = self.0.select(b"", 256).unwrap();
        // Set SNI
        let peer = Box::new(HttpPeer::new(upstream, true, "one.one.one.one".to_string()));
        Ok(peer)
    }

    async fn upstream_request_filter(
        &self,
        _session: &mut Session,
        upstream_request: &mut RequestHeader,
        _ctx: &mut Self::CTX,
    ) -> Result<()>
    where
        Self::CTX: Send + Sync,
    {
        upstream_request
            .insert_header("Host", "one.one.one.one")
            .unwrap();
        Ok(())
    }

    async fn request_filter(&self, session: &mut Session, _ctx: &mut Self::CTX) -> Result<bool>
    where
        Self::CTX: Send + Sync,
    {
        let appid = match self.get_request_appid(session) {
            None => return Ok(false), // no client appid found, skip rate limiting
            Some(addr) => addr,
        };

        // retrieve the current window requests
        let curr_window_requests = RATE_LIMITER.observe(&appid, 1);
        if curr_window_requests > MAX_REQ_PER_SEC {
            // rate limited, return 429
            let mut header = ResponseHeader::build(429, None).unwrap();
            header
                .insert_header("X-Rate-Limit-Limit", MAX_REQ_PER_SEC.to_string())
                .unwrap();
            header.insert_header("X-Rate-Limit-Remaining", "0").unwrap();
            header.insert_header("X-Rate-Limit-Reset", "1").unwrap();
            session.set_keepalive(None);
            session
                .write_response_header(Box::new(header), true)
                .await?;
            return Ok(true);
        }
        Ok(false)
    }
}


================================================
FILE: pingora-proxy/examples/use_module.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use async_trait::async_trait;

use pingora_core::modules::http::HttpModules;
use pingora_core::server::configuration::Opt;
use pingora_core::server::Server;
use pingora_core::upstreams::peer::HttpPeer;
use pingora_core::Result;
use pingora_http::RequestHeader;
use pingora_proxy::{ProxyHttp, Session};

// This example shows how to build and import 3rd party modules

/// A simple ACL to check "Authorization: basic $credential" header
mod my_acl {
    use super::*;
    use pingora_core::modules::http::{HttpModule, HttpModuleBuilder, Module};
    use pingora_error::{Error, ErrorType::HTTPStatus};
    use std::any::Any;

    // This is the struct for per request module context
    struct MyAclCtx {
        credential_header: String,
    }

    // Implement how the module would consume and/or modify request and/or response
    #[async_trait]
    impl HttpModule for MyAclCtx {
        async fn request_header_filter(&mut self, req: &mut RequestHeader) -> Result<()> {
            let Some(auth) = req.headers.get(http::header::AUTHORIZATION) else {
                return Error::e_explain(HTTPStatus(403), "Auth failed, no auth header");
            };

            if auth.as_bytes() != self.credential_header.as_bytes() {
                Error::e_explain(HTTPStatus(403), "Auth failed, credential mismatch")
            } else {
                Ok(())
            }
        }

        // boilerplate code for all modules
        fn as_any(&self) -> &dyn Any {
            self
        }
        fn as_any_mut(&mut self) -> &mut dyn Any {
            self
        }
    }

    // This is the singleton object which will be attached to the server
    pub struct MyAcl {
        pub credential: String,
    }
    impl HttpModuleBuilder for MyAcl {
        // This function defines how to create each Ctx. This function is called when a new request
        // arrives
        fn init(&self) -> Module {
            Box::new(MyAclCtx {
                // Make it easier to compare header
                // We could also store this value in MyAcl and use Arc to share it with every Ctx.
                credential_header: format!("basic {}", self.credential),
            })
        }
    }
}

pub struct MyProxy;

#[async_trait]
impl ProxyHttp for MyProxy {
    type CTX = ();
    fn new_ctx(&self) -> Self::CTX {}

    // This function is only called once when the server starts
    fn init_downstream_modules(&self, modules: &mut HttpModules) {
        // Add the module to MyProxy
        modules.add_module(Box::new(my_acl::MyAcl {
            credential: "testcode".into(),
        }))
    }

    async fn upstream_peer(
        &self,
        _session: &mut Session,
        _ctx: &mut Self::CTX,
    ) -> Result<Box<HttpPeer>> {
        let peer = Box::new(HttpPeer::new(
            ("1.1.1.1", 443),
            true,
            "one.one.one.one".to_string(),
        ));
        Ok(peer)
    }
}

// RUST_LOG=INFO cargo run --example use_module
// curl 127.0.0.1:6193 -H "Host: one.one.one.one" -v
// curl 127.0.0.1:6193 -H "Host: one.one.one.one" -H "Authorization: basic testcode"
// curl 127.0.0.1:6193 -H "Host: one.one.one.one" -H "Authorization: basic wrong" -v
fn main() {
    env_logger::init();

    // read command line arguments
    let opt = Opt::parse_args();
    let mut my_server = Server::new(Some(opt)).unwrap();
    my_server.bootstrap();

    let mut my_proxy = pingora_proxy::http_proxy_service(&my_server.configuration, MyProxy);
    my_proxy.add_tcp("0.0.0.0:6193");

    my_server.add_service(my_proxy);
    my_server.run_forever();
}


================================================
FILE: pingora-proxy/examples/virtual_l4.rs
================================================
//! This example demonstrates to how to implement a custom L4 connector
//! together with a virtual socket.

use std::net::{IpAddr, Ipv4Addr, SocketAddr};
use std::sync::Arc;

use async_trait::async_trait;
use pingora_core::connectors::L4Connect;
use pingora_core::prelude::HttpPeer;
use pingora_core::protocols::l4::socket::SocketAddr as L4SocketAddr;
use pingora_core::protocols::l4::stream::Stream;
use pingora_core::protocols::l4::virt::{VirtualSocket, VirtualSocketStream};
use pingora_core::server::RunArgs;
use pingora_core::server::{configuration::ServerConf, Server};
use pingora_core::services::listening::Service;
use pingora_core::upstreams::peer::PeerOptions;
use pingora_error::Result;
use pingora_proxy::{http_proxy_service_with_name, prelude::*, HttpProxy, ProxyHttp};
use tokio::io::{AsyncRead, AsyncWrite};

/// Static virtual socket that serves a single HTTP request with a static response.
///
/// In real world use cases you would implement [`VirtualSocket`] for streams
/// that implement `AsyncRead + AsyncWrite`.
#[derive(Debug)]
struct StaticVirtualSocket {
    content: Vec<u8>,
    read_pos: usize,
}

impl StaticVirtualSocket {
    fn new() -> Self {
        let response = b"HTTP/1.1 200 OK\r\nContent-Length: 13\r\n\r\nHello, world!";
        Self {
            content: response.to_vec(),
            read_pos: 0,
        }
    }
}

impl AsyncRead for StaticVirtualSocket {
    fn poll_read(
        mut self: std::pin::Pin<&mut Self>,
        _cx: &mut std::task::Context<'_>,
        buf: &mut tokio::io::ReadBuf<'_>,
    ) -> std::task::Poll<std::io::Result<()>> {
        debug_assert!(self.read_pos <= self.content.len());

        let remaining = self.content.len() - self.read_pos;
        if remaining == 0 {
            return std::task::Poll::Ready(Ok(()));
        }

        let to_read = std::cmp::min(remaining, buf.remaining());
        buf.put_slice(&self.content[self.read_pos..self.read_pos + to_read]);
        self.read_pos += to_read;

        std::task::Poll::Ready(Ok(()))
    }
}

impl AsyncWrite for StaticVirtualSocket {
    fn poll_write(
        self: std::pin::Pin<&mut Self>,
        _cx: &mut std::task::Context<'_>,
        buf: &[u8],
    ) -> std::task::Poll<std::io::Result<usize>> {
        // Discard all writes
        std::task::Poll::Ready(Ok(buf.len()))
    }

    fn poll_flush(
        self: std::pin::Pin<&mut Self>,
        _cx: &mut std::task::Context<'_>,
    ) -> std::task::Poll<std::io::Result<()>> {
        std::task::Poll::Ready(Ok(()))
    }

    fn poll_shutdown(
        self: std::pin::Pin<&mut Self>,
        _cx: &mut std::task::Context<'_>,
    ) -> std::task::Poll<std::io::Result<()>> {
        std::task::Poll::Ready(Ok(()))
    }
}

impl VirtualSocket for StaticVirtualSocket {
    fn set_socket_option(
        &self,
        _opt: pingora_core::protocols::l4::virt::VirtualSockOpt,
    ) -> std::io::Result<()> {
        Ok(())
    }
}

#[derive(Debug)]
struct VirtualConnector;

#[async_trait]
impl L4Connect for VirtualConnector {
    async fn connect(&self, _addr: &L4SocketAddr) -> pingora_error::Result<Stream> {
        Ok(Stream::from(VirtualSocketStream::new(Box::new(
            StaticVirtualSocket::new(),
        ))))
    }
}

struct VirtualProxy {
    connector: Arc<dyn L4Connect + Send + Sync>,
}

impl VirtualProxy {
    fn new() -> Self {
        Self {
            connector: Arc::new(VirtualConnector),
        }
    }
}

#[async_trait::async_trait]
impl ProxyHttp for VirtualProxy {
    type CTX = ();

    fn new_ctx(&self) -> Self::CTX {}

    // Route everything to example.org unless the Host header is "virtual.test",
    // in which case target the special virtual address 203.0.113.1:18080.
    async fn upstream_peer(
        &self,
        _session: &mut Session,
        _ctx: &mut Self::CTX,
    ) -> Result<Box<pingora_core::upstreams::peer::HttpPeer>> {
        let mut options = PeerOptions::new();
        options.custom_l4 = Some(self.connector.clone());

        Ok(Box::new(HttpPeer {
            _address: L4SocketAddr::Inet(SocketAddr::new(
                IpAddr::V4(Ipv4Addr::new(1, 1, 1, 1)),
                80,
            )),
            scheme: pingora_core::upstreams::peer::Scheme::HTTP,
            sni: "example.org".to_string(),
            proxy: None,
            client_cert_key: None,
            group_key: 0,
            options,
        }))
    }
}

fn main() {
    // Minimal server config
    let conf = Arc::new(ServerConf::default());

    // Build the service and set the default L4 connector
    let mut svc: Service<HttpProxy<VirtualProxy>> =
        http_proxy_service_with_name(&conf, VirtualProxy::new(), "virtual-proxy");

    // Listen
    let addr = "127.0.0.1:6196";
    svc.add_tcp(addr);

    let mut server = Server::new(None).unwrap();
    server.add_service(svc);
    let run = RunArgs::default();

    eprintln!("Listening on {addr}, try: curl http://{addr}/");
    server.run(run);
}


================================================
FILE: pingora-proxy/src/lib.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! # pingora-proxy
//!
//! Programmable HTTP proxy built on top of [pingora_core].
//!
//! # Features
//! - HTTP/1.x and HTTP/2 for both downstream and upstream
//! - Connection pooling
//! - TLSv1.3, mutual TLS, customizable CA
//! - Request/Response scanning, modification or rejection
//! - Dynamic upstream selection
//! - Configurable retry and failover
//! - Fully programmable and customizable at any stage of a HTTP request
//!
//! # How to use
//!
//! Users of this crate defines their proxy by implementing [ProxyHttp] trait, which contains the
//! callbacks to be invoked at each stage of a HTTP request.
//!
//! Then the service can be passed into [`http_proxy_service()`] for a [pingora_core::server::Server] to
//! run it.
//!
//! See `examples/load_balancer.rs` for a detailed example.

use async_trait::async_trait;
use bytes::Bytes;
use futures::future::BoxFuture;
use futures::future::FutureExt;
use http::{header, version::Version, Method};
use log::{debug, error, trace, warn};
use once_cell::sync::Lazy;
use pingora_http::{RequestHeader, ResponseHeader};
use std::fmt::Debug;
use std::str;
use std::sync::{
    atomic::{AtomicBool, Ordering},
    Arc,
};
use std::time::Duration;
use tokio::sync::{mpsc, Notify};
use tokio::time;

use pingora_cache::NoCacheReason;
use pingora_core::apps::{
    HttpPersistentSettings, HttpServerApp, HttpServerOptions, ReusedHttpStream,
};
use pingora_core::connectors::http::custom;
use pingora_core::connectors::{http::Connector, ConnectorOptions};
use pingora_core::modules::http::compression::ResponseCompressionBuilder;
use pingora_core::modules::http::{HttpModuleCtx, HttpModules};
use pingora_core::protocols::http::client::HttpSession as ClientSession;
use pingora_core::protocols::http::custom::CustomMessageWrite;
use pingora_core::protocols::http::subrequest::server::SubrequestHandle;
use pingora_core::protocols::http::v1::client::HttpSession as HttpSessionV1;
use pingora_core::protocols::http::v2::server::H2Options;
use pingora_core::protocols::http::HttpTask;
use pingora_core::protocols::http::ServerSession as HttpSession;
use pingora_core::protocols::http::SERVER_NAME;
use pingora_core::protocols::Stream;
use pingora_core::protocols::{Digest, UniqueID};
use pingora_core::server::configuration::ServerConf;
use pingora_core::server::ShutdownWatch;
use pingora_core::upstreams::peer::{HttpPeer, Peer};
use pingora_error::{Error, ErrorSource, ErrorType::*, OrErr, Result};

const TASK_BUFFER_SIZE: usize = 4;

mod proxy_cache;
mod proxy_common;
mod proxy_custom;
mod proxy_h1;
mod proxy_h2;
mod proxy_purge;
mod proxy_trait;
pub mod subrequest;

use subrequest::{BodyMode, Ctx as SubrequestCtx};

pub use proxy_cache::range_filter::{range_header_filter, MultiRangeInfo, RangeType};
pub use proxy_purge::PurgeStatus;
pub use proxy_trait::{FailToProxy, ProxyHttp};

pub mod prelude {
    pub use crate::{http_proxy, http_proxy_service, ProxyHttp, Session};
}

pub type ProcessCustomSession<SV, C> = Arc<
    dyn Fn(Arc<HttpProxy<SV, C>>, Stream, &ShutdownWatch) -> BoxFuture<'static, Option<Stream>>
        + Send
        + Sync
        + Unpin
        + 'static,
>;

/// The concrete type that holds the user defined HTTP proxy.
///
/// Users don't need to interact with this object directly.
pub struct HttpProxy<SV, C = ()>
where
    C: custom::Connector, // Upstream custom connector
{
    inner: SV, // TODO: name it better than inner
    client_upstream: Connector<C>,
    shutdown: Notify,
    shutdown_flag: Arc<AtomicBool>,
    pub server_options: Option<HttpServerOptions>,
    pub h2_options: Option<H2Options>,
    pub downstream_modules: HttpModules,
    max_retries: usize,
    process_custom_session: Option<ProcessCustomSession<SV, C>>,
}

impl<SV> HttpProxy<SV, ()> {
    /// Create a new [`HttpProxy`] with the given [`ProxyHttp`] implementation and [`ServerConf`].
    ///
    /// After creating an `HttpProxy`, you should call [`HttpProxy::handle_init_modules()`] to
    /// initialize the downstream modules before processing requests.
    ///
    /// For most use cases, prefer using [`http_proxy_service()`] which wraps the `HttpProxy` in a
    /// [`Service`]. This constructor is useful when you need to integrate `HttpProxy` into a custom
    /// accept loop (e.g., for SNI-based routing decisions before TLS termination).
    ///
    /// # Example
    ///
    /// ```ignore
    /// use pingora_proxy::HttpProxy;
    /// use std::sync::Arc;
    ///
    /// let mut proxy = HttpProxy::new(my_proxy_app, server_conf);
    /// proxy.handle_init_modules();
    /// let proxy = Arc::new(proxy);
    /// // Use proxy.process_new_http() in your custom accept loop
    /// ```
    pub fn new(inner: SV, conf: Arc<ServerConf>) -> Self {
        HttpProxy {
            inner,
            client_upstream: Connector::new(Some(ConnectorOptions::from_server_conf(&conf))),
            shutdown: Notify::new(),
            shutdown_flag: Arc::new(AtomicBool::new(false)),
            server_options: None,
            h2_options: None,
            downstream_modules: HttpModules::new(),
            max_retries: conf.max_retries,
            process_custom_session: None,
        }
    }
}

impl<SV, C> HttpProxy<SV, C>
where
    C: custom::Connector,
{
    fn new_custom(
        inner: SV,
        conf: Arc<ServerConf>,
        connector: C,
        on_custom: Option<ProcessCustomSession<SV, C>>,
        server_options: Option<HttpServerOptions>,
    ) -> Self
    where
        SV: ProxyHttp + Send + Sync + 'static,
        SV::CTX: Send + Sync,
    {
        let client_upstream =
            Connector::new_custom(Some(ConnectorOptions::from_server_conf(&conf)), connector);

        HttpProxy {
            inner,
            client_upstream,
            shutdown: Notify::new(),
            shutdown_flag: Arc::new(AtomicBool::new(false)),
            server_options,
            downstream_modules: HttpModules::new(),
            max_retries: conf.max_retries,
            process_custom_session: on_custom,
            h2_options: None,
        }
    }

    /// Initialize the downstream modules for this proxy.
    ///
    /// This method must be called after creating an [`HttpProxy`] with [`HttpProxy::new()`]
    /// and before processing any requests. It invokes [`ProxyHttp::init_downstream_modules()`]
    /// to set up any HTTP modules configured by the user's proxy implementation.
    ///
    /// Note: When using [`http_proxy_service()`] or [`http_proxy_service_with_name()`],
    /// this method is called automatically.
    pub fn handle_init_modules(&mut self)
    where
        SV: ProxyHttp,
    {
        self.inner
            .init_downstream_modules(&mut self.downstream_modules);
    }

    async fn handle_new_request(
        &self,
        mut downstream_session: Box<HttpSession>,
    ) -> Option<Box<HttpSession>>
    where
        SV: ProxyHttp + Send + Sync,
        SV::CTX: Send + Sync,
    {
        // phase 1 read request header

        let res = tokio::select! {
            biased; // biased select is cheaper, and we don't want to drop already buffered requests
            res = downstream_session.read_request() => { res }
            _ = self.shutdown.notified() => {
                // service shutting down, dropping the connection to stop more req from coming in
                return None;
            }
        };
        match res {
            Ok(true) => {
                // TODO: check n==0
                debug!("Successfully get a new request");
            }
            Ok(false) => {
                return None; // TODO: close connection?
            }
            Err(mut e) => {
                e.as_down();
                error!("Fail to proxy: {e}");
                if matches!(e.etype, InvalidHTTPHeader) {
                    downstream_session
                        .respond_error(400)
                        .await
                        .unwrap_or_else(|e| {
                            error!("failed to send error response to downstream: {e}");
                        });
                } // otherwise the connection must be broken, no need to send anything
                downstream_session.shutdown().await;
                return None;
            }
        }
        trace!(
            "Request header: {:?}",
            downstream_session.req_header().as_ref()
        );
        // CONNECT method proxying is not default supported by the proxy http logic itself,
        // since the tunneling process changes the request-response flow.
        // https://datatracker.ietf.org/doc/html/rfc9110#name-connect
        // Also because the method impacts message framing in a way is currently unaccounted for
        // (https://datatracker.ietf.org/doc/html/rfc9112#section-6.3-2.2)
        // it is safest to disallow use of the method by default.
        if !self
            .server_options
            .as_ref()
            .is_some_and(|opts| opts.allow_connect_method_proxying)
            && downstream_session.req_header().method == Method::CONNECT
        {
            downstream_session
                .respond_error(405)
                .await
                .unwrap_or_else(|e| {
                    error!("failed to send error response to downstream: {e}");
                });
            downstream_session.shutdown().await;
            return None;
        }
        Some(downstream_session)
    }

    // return bool: server_session can be reused, and error if any
    async fn proxy_to_upstream(
        &self,
        session: &mut Session,
        ctx: &mut SV::CTX,
    ) -> (bool, Option<Box<Error>>)
    where
        SV: ProxyHttp + Send + Sync,
        SV::CTX: Send + Sync,
    {
        let peer = match self.inner.upstream_peer(session, ctx).await {
            Ok(p) => p,
            Err(e) => return (false, Some(e)),
        };

        let client_session = self.client_upstream.get_http_session(&*peer).await;
        match client_session {
            Ok((client_session, client_reused)) => {
                let (server_reused, error) = match client_session {
                    ClientSession::H1(mut h1) => {
                        let (server_reused, client_reuse, error) = self
                            .proxy_to_h1_upstream(session, &mut h1, client_reused, &peer, ctx)
                            .await;
                        if client_reuse {
                            let session = ClientSession::H1(h1);
                            self.client_upstream
                                .release_http_session(session, &*peer, peer.idle_timeout())
                                .await;
                        }
                        (server_reused, error)
                    }
                    ClientSession::H2(mut h2) => {
                        let (server_reused, mut error) = self
                            .proxy_to_h2_upstream(session, &mut h2, client_reused, &peer, ctx)
                            .await;
                        let session = ClientSession::H2(h2);
                        self.client_upstream
                            .release_http_session(session, &*peer, peer.idle_timeout())
                            .await;

                        if let Some(e) = error.as_mut() {
                            // try to downgrade if A. origin says so or B. origin sends an invalid
                            // response, which usually means origin h2 is not production ready
                            if matches!(e.etype, H2Downgrade | InvalidH2) {
                                if peer
                                    .get_alpn()
                                    .is_none_or(|alpn| alpn.get_min_http_version() == 1)
                                {
                                    // Add the peer to prefer h1 so that all following requests
                                    // will use h1
                                    self.client_upstream.prefer_h1(&*peer);
                                } else {
                                    // the peer doesn't allow downgrading to h1 (e.g. gRPC)
                                    e.retry = false.into();
                                }
                            }
                        }

                        (server_reused, error)
                    }
                    ClientSession::Custom(mut c) => {
                        let (server_reused, error) = self
                            .proxy_to_custom_upstream(session, &mut c, client_reused, &peer, ctx)
                            .await;
                        let session = ClientSession::Custom(c);
                        self.client_upstream
                            .release_http_session(session, &*peer, peer.idle_timeout())
                            .await;
                        (server_reused, error)
                    }
                };
                (
                    server_reused,
                    error.map(|e| {
                        self.inner
                            .error_while_proxy(&peer, session, e, ctx, client_reused)
                    }),
                )
            }
            Err(mut e) => {
                e.as_up();
                let new_err = self.inner.fail_to_connect(session, &peer, ctx, e);
                (false, Some(new_err.into_up()))
            }
        }
    }

    async fn upstream_filter(
        &self,
        session: &mut Session,
        task: &mut HttpTask,
        ctx: &mut SV::CTX,
    ) -> Result<Option<Duration>>
    where
        SV: ProxyHttp + Send + Sync,
        SV::CTX: Send + Sync,
    {
        let duration = match task {
            HttpTask::Header(header, _eos) => {
                self.inner
                    .upstream_response_filter(session, header, ctx)
                    .await?;
                None
            }
            HttpTask::Body(data, eos) | HttpTask::UpgradedBody(data, eos) => self
                .inner
                .upstream_response_body_filter(session, data, *eos, ctx)?,
            HttpTask::Trailer(Some(trailers)) => {
                self.inner
                    .upstream_response_trailer_filter(session, trailers, ctx)?;
                None
            }
            _ => {
                // task does not support a filter
                None
            }
        };

        Ok(duration)
    }

    async fn finish(
        &self,
        mut session: Session,
        ctx: &mut SV::CTX,
        reuse: bool,
        error: Option<Box<Error>>,
    ) -> Option<ReusedHttpStream>
    where
        SV: ProxyHttp + Send + Sync,
        SV::CTX: Send + Sync,
    {
        self.inner
            .logging(&mut session, error.as_deref(), ctx)
            .await;

        if let Some(e) = error {
            session.downstream_session.on_proxy_failure(e);
        }

        if reuse {
            // TODO: log error
            let persistent_settings = HttpPersistentSettings::for_session(&session);
            session
                .downstream_session
                .finish()
                .await
                .ok()
                .flatten()
                .map(|s| ReusedHttpStream::new(s, Some(persistent_settings)))
        } else {
            None
        }
    }

    fn cleanup_sub_req(&self, session: &mut Session) {
        if let Some(ctx) = session.subrequest_ctx.as_mut() {
            ctx.release_write_lock();
        }
    }
}

use pingora_cache::HttpCache;
use pingora_core::protocols::http::compression::ResponseCompressionCtx;

/// The established HTTP session
///
/// This object is what users interact with in order to access the request itself or change the proxy
/// behavior.
pub struct Session {
    /// the HTTP session to downstream (the client)
    pub downstream_session: Box<HttpSession>,
    /// The interface to control HTTP caching
    pub cache: HttpCache,
    /// (de)compress responses coming into the proxy (from upstream)
    pub upstream_compression: ResponseCompressionCtx,
    /// ignore downstream range (skip downstream range filters)
    pub ignore_downstream_range: bool,
    /// Were the upstream request headers modified?
    pub upstream_headers_mutated_for_cache: bool,
    /// The context from parent request, if this is a subrequest.
    pub subrequest_ctx: Option<Box<SubrequestCtx>>,
    /// Handle to allow spawning subrequests, assigned by the `Subrequest` app logic.
    pub subrequest_spawner: Option<SubrequestSpawner>,
    // Downstream filter modules
    pub downstream_modules_ctx: HttpModuleCtx,
    /// Upstream response body bytes received (payload only). Set by proxy layer.
    /// TODO: move this into an upstream session digest for future fields.
    upstream_body_bytes_received: usize,
    /// Upstream write pending time. Set by proxy layer (HTTP/1.x only).
    upstream_write_pending_time: Duration,
    /// Flag that is set when the shutdown process has begun.
    shutdown_flag: Arc<AtomicBool>,
}

impl Session {
    fn new(
        downstream_session: impl Into<Box<HttpSession>>,
        downstream_modules: &HttpModules,
        shutdown_flag: Arc<AtomicBool>,
    ) -> Self {
        Session {
            downstream_session: downstream_session.into(),
            cache: HttpCache::new(),
            // disable both upstream and downstream compression
            upstream_compression: ResponseCompressionCtx::new(0, false, false),
            ignore_downstream_range: false,
            upstream_headers_mutated_for_cache: false,
            subrequest_ctx: None,
            subrequest_spawner: None, // optionally set later on
            downstream_modules_ctx: downstream_modules.build_ctx(),
            upstream_body_bytes_received: 0,
            upstream_write_pending_time: Duration::ZERO,
            shutdown_flag,
        }
    }

    /// Create a new [Session] from the given [Stream]
    ///
    /// This function is mostly used for testing and mocking, given the downstream modules and
    /// shutdown flags will never be set.
    pub fn new_h1(stream: Stream) -> Self {
        let modules = HttpModules::new();
        Self::new(
            Box::new(HttpSession::new_http1(stream)),
            &modules,
            Arc::new(AtomicBool::new(false)),
        )
    }

    /// Create a new [Session] from the given [Stream] with modules
    ///
    /// This function is mostly used for testing and mocking, given the shutdown flag will never be
    /// set.
    pub fn new_h1_with_modules(stream: Stream, downstream_modules: &HttpModules) -> Self {
        Self::new(
            Box::new(HttpSession::new_http1(stream)),
            downstream_modules,
            Arc::new(AtomicBool::new(false)),
        )
    }

    pub fn as_downstream_mut(&mut self) -> &mut HttpSession {
        &mut self.downstream_session
    }

    pub fn as_downstream(&self) -> &HttpSession {
        &self.downstream_session
    }

    /// Write HTTP response with the given error code to the downstream.
    pub async fn respond_error(&mut self, error: u16) -> Result<()> {
        self.as_downstream_mut().respond_error(error).await
    }

    /// Write HTTP response with the given error code to the downstream with a body.
    pub async fn respond_error_with_body(&mut self, error: u16, body: Bytes) -> Result<()> {
        self.as_downstream_mut()
            .respond_error_with_body(error, body)
            .await
    }

    /// Write the given HTTP response header to the downstream
    ///
    /// Different from directly calling [HttpSession::write_response_header], this function also
    /// invokes the filter modules.
    pub async fn write_response_header(
        &mut self,
        mut resp: Box<ResponseHeader>,
        end_of_stream: bool,
    ) -> Result<()> {
        self.downstream_modules_ctx
            .response_header_filter(&mut resp, end_of_stream)
            .await?;
        self.downstream_session.write_response_header(resp).await
    }

    /// Similar to `write_response_header()`, this fn will clone the `resp` internally
    pub async fn write_response_header_ref(
        &mut self,
        resp: &ResponseHeader,
        end_of_stream: bool,
    ) -> Result<(), Box<Error>> {
        self.write_response_header(Box::new(resp.clone()), end_of_stream)
            .await
    }

    /// Write the given HTTP response body chunk to the downstream
    ///
    /// Different from directly calling [HttpSession::write_response_body], this function also
    /// invokes the filter modules.
    pub async fn write_response_body(
        &mut self,
        mut body: Option<Bytes>,
        end_of_stream: bool,
    ) -> Result<()> {
        self.downstream_modules_ctx
            .response_body_filter(&mut body, end_of_stream)?;

        if body.is_none() && !end_of_stream {
            return Ok(());
        }

        let data = body.unwrap_or_default();
        self.downstream_session
            .write_response_body(data, end_of_stream)
            .await
    }

    pub async fn write_response_tasks(&mut self, mut tasks: Vec<HttpTask>) -> Result<bool> {
        let mut seen_upgraded = self.was_upgraded();
        for task in tasks.iter_mut() {
            match task {
                HttpTask::Header(resp, end) => {
                    self.downstream_modules_ctx
                        .response_header_filter(resp, *end)
                        .await?;
                }
                HttpTask::Body(data, end) => {
                    self.downstream_modules_ctx
                        .response_body_filter(data, *end)?;
                }
                HttpTask::UpgradedBody(data, end) => {
                    seen_upgraded = true;
                    self.downstream_modules_ctx
                        .response_body_filter(data, *end)?;
                }
                HttpTask::Trailer(trailers) => {
                    if let Some(buf) = self
                        .downstream_modules_ctx
                        .response_trailer_filter(trailers)?
                    {
                        // Write the trailers into the body if the filter
                        // returns a buffer.
                        //
                        // Note, this will not work if end of stream has already
                        // been seen or we've written content-length bytes.
                        // (Trailers should never come after upgraded body)
                        *task = HttpTask::Body(Some(buf), true);
                    }
                }
                HttpTask::Done => {
                    // `Done` can be sent in certain response paths to mark end
                    // of response if not already done via trailers or body with
                    // end flag set.
                    // If the filter returns body bytes on Done,
                    // write them into the response.
                    //
                    // Note, this will not work if end of stream has already
                    // been seen or we've written content-length bytes.
                    if let Some(buf) = self.downstream_modules_ctx.response_done_filter()? {
                        if seen_upgraded {
                            *task = HttpTask::UpgradedBody(Some(buf), true);
                        } else {
                            *task = HttpTask::Body(Some(buf), true);
                        }
                    }
                }
                _ => { /* Failed */ }
            }
        }
        self.downstream_session.response_duplex_vec(tasks).await
    }

    /// Mark the upstream headers as modified by caching. This should lead to range filters being
    /// skipped when responding to the downstream.
    pub fn mark_upstream_headers_mutated_for_cache(&mut self) {
        self.upstream_headers_mutated_for_cache = true;
    }

    /// Check whether the upstream headers were marked as mutated during the request.
    pub fn upstream_headers_mutated_for_cache(&self) -> bool {
        self.upstream_headers_mutated_for_cache
    }

    /// Get the total upstream response body bytes received (payload only) recorded by the proxy layer.
    pub fn upstream_body_bytes_received(&self) -> usize {
        self.upstream_body_bytes_received
    }

    /// Set the total upstream response body bytes received (payload only). Intended for internal use by proxy layer.
    pub(crate) fn set_upstream_body_bytes_received(&mut self, n: usize) {
        self.upstream_body_bytes_received = n;
    }

    /// Get the upstream write pending time recorded by the proxy layer. Returns [`Duration::ZERO`] for HTTP/2.
    pub fn upstream_write_pending_time(&self) -> Duration {
        self.upstream_write_pending_time
    }

    /// Set the upstream write pending time. Intended for internal use by proxy layer.
    pub(crate) fn set_upstream_write_pending_time(&mut self, d: Duration) {
        self.upstream_write_pending_time = d;
    }

    /// Is the proxy process in the process of shutting down (e.g. due to graceful upgrade)?
    pub fn is_process_shutting_down(&self) -> bool {
        self.shutdown_flag.load(Ordering::Acquire)
    }

    pub fn downstream_custom_message(
        &mut self,
    ) -> Result<
        Option<Box<dyn futures::Stream<Item = Result<Bytes>> + Unpin + Send + Sync + 'static>>,
    > {
        if let Some(custom_session) = self.downstream_session.as_custom_mut() {
            custom_session
                .take_custom_message_reader()
                .map(Some)
                .ok_or(Error::explain(
                    ReadError,
                    "can't extract custom reader from downstream",
                ))
        } else {
            Ok(None)
        }
    }
}

impl AsRef<HttpSession> for Session {
    fn as_ref(&self) -> &HttpSession {
        &self.downstream_session
    }
}

impl AsMut<HttpSession> for Session {
    fn as_mut(&mut self) -> &mut HttpSession {
        &mut self.downstream_session
    }
}

use std::ops::{Deref, DerefMut};

impl Deref for Session {
    type Target = HttpSession;

    fn deref(&self) -> &Self::Target {
        &self.downstream_session
    }
}

impl DerefMut for Session {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.downstream_session
    }
}

// generic HTTP 502 response sent when proxy_upstream_filter refuses to connect to upstream
static BAD_GATEWAY: Lazy<ResponseHeader> = Lazy::new(|| {
    let mut resp = ResponseHeader::build(http::StatusCode::BAD_GATEWAY, Some(3)).unwrap();
    resp.insert_header(header::SERVER, &SERVER_NAME[..])
        .unwrap();
    resp.insert_header(header::CONTENT_LENGTH, 0).unwrap();
    resp.insert_header(header::CACHE_CONTROL, "private, no-store")
        .unwrap();

    resp
});

impl<SV, C> HttpProxy<SV, C>
where
    C: custom::Connector,
{
    async fn process_request(
        self: &Arc<Self>,
        mut session: Session,
        mut ctx: <SV as ProxyHttp>::CTX,
    ) -> Option<ReusedHttpStream>
    where
        SV: ProxyHttp + Send + Sync + 'static,
        <SV as ProxyHttp>::CTX: Send + Sync,
    {
        if let Err(e) = self
            .inner
            .early_request_filter(&mut session, &mut ctx)
            .await
        {
            return self
                .handle_error(session, &mut ctx, e, "Fail to early filter request:")
                .await;
        }

        if self.inner.allow_spawning_subrequest(&session, &ctx) {
            session.subrequest_spawner = Some(SubrequestSpawner::new(self.clone()));
        }

        let req = session.downstream_session.req_header_mut();

        // Built-in downstream request filters go first
        if let Err(e) = session
            .downstream_modules_ctx
            .request_header_filter(req)
            .await
        {
            return self
                .handle_error(
                    session,
                    &mut ctx,
                    e,
                    "Failed in downstream modules request filter:",
                )
                .await;
        }

        match self.inner.request_filter(&mut session, &mut ctx).await {
            Ok(response_sent) => {
                if response_sent {
                    // TODO: log error
                    self.inner.logging(&mut session, None, &mut ctx).await;
                    self.cleanup_sub_req(&mut session);
                    let persistent_settings = HttpPersistentSettings::for_session(&session);
                    return session
                        .downstream_session
                        .finish()
                        .await
                        .ok()
                        .flatten()
                        .map(|s| ReusedHttpStream::new(s, Some(persistent_settings)));
                }
                /* else continue */
            }
            Err(e) => {
                return self
                    .handle_error(session, &mut ctx, e, "Fail to filter request:")
                    .await;
            }
        }

        if let Some((reuse, err)) = self.proxy_cache(&mut session, &mut ctx).await {
            // cache hit
            return self.finish(session, &mut ctx, reuse, err).await;
        }
        // either uncacheable, or cache miss

        // there should not be a write lock in the sub req ctx after this point
        self.cleanup_sub_req(&mut session);

        // decide if the request is allowed to go to upstream
        match self
            .inner
            .proxy_upstream_filter(&mut session, &mut ctx)
            .await
        {
            Ok(proxy_to_upstream) => {
                if !proxy_to_upstream {
                    // The hook can choose to write its own response, but if it doesn't, we respond
                    // with a generic 502
                    if session.cache.enabled() {
                        // drop the cache lock that this request may be holding onto
                        session.cache.disable(NoCacheReason::DeclinedToUpstream);
                    }
                    if session.response_written().is_none() {
                        match session.write_response_header_ref(&BAD_GATEWAY, true).await {
                            Ok(()) => {}
                            Err(e) => {
                                return self
                                    .handle_error(
                                        session,
                                        &mut ctx,
                                        e,
                                        "Error responding with Bad Gateway:",
                                    )
                                    .await;
                            }
                        }
                    }

                    return self.finish(session, &mut ctx, true, None).await;
                }
                /* else continue */
            }
            Err(e) => {
                if session.cache.enabled() {
                    session.cache.disable(NoCacheReason::InternalError);
                }

                return self
                    .handle_error(
                        session,
                        &mut ctx,
                        e,
                        "Error deciding if we should proxy to upstream:",
                    )
                    .await;
            }
        }

        let mut retries: usize = 0;

        let mut server_reuse = false;
        let mut proxy_error: Option<Box<Error>> = None;

        while retries < self.max_retries {
            retries += 1;

            let (reuse, e) = self.proxy_to_upstream(&mut session, &mut ctx).await;
            server_reuse = reuse;

            match e {
                Some(error) => {
                    let retry = error.retry();
                    proxy_error = Some(error);
                    if !retry {
                        break;
                    }
                    // only log error that will be retried here, the final error will be logged below
                    warn!(
                        "Fail to proxy: {}, tries: {}, retry: {}, {}",
                        proxy_error.as_ref().unwrap(),
                        retries,
                        retry,
                        self.inner.request_summary(&session, &ctx)
                    );
                }
                None => {
                    proxy_error = None;
                    break;
                }
            };
        }

        // serve stale if error
        // Check both error and cache before calling the function because await is not cheap
        // allow unwrap until if let chains
        #[allow(clippy::unnecessary_unwrap)]
        let serve_stale_result = if proxy_error.is_some() && session.cache.can_serve_stale_error() {
            self.handle_stale_if_error(&mut session, &mut ctx, proxy_error.as_ref().unwrap())
                .await
        } else {
            None
        };

        let final_error = if let Some((reuse, stale_cache_error)) = serve_stale_result {
            // don't reuse server conn if serve stale polluted it
            server_reuse = server_reuse && reuse;
            stale_cache_error
        } else {
            proxy_error
        };

        if let Some(e) = final_error.as_ref() {
            // If we have errored and are still holding a cache lock, release it.
            if session.cache.enabled() {
                let reason = if *e.esource() == ErrorSource::Upstream {
                    NoCacheReason::UpstreamError
                } else {
                    NoCacheReason::InternalError
                };
                session.cache.disable(reason);
            }
            let res = self.inner.fail_to_proxy(&mut session, e, &mut ctx).await;

            // final error will have > 0 status unless downstream connection is dead
            if !self.inner.suppress_error_log(&session, &ctx, e) {
                error!(
                    "Fail to proxy: {}, status: {}, tries: {}, retry: {}, {}",
                    final_error.as_ref().unwrap(),
                    res.error_code,
                    retries,
                    false, // we never retry here
                    self.inner.request_summary(&session, &ctx),
                );
            }
        }

        // logging() will be called in finish()
        self.finish(session, &mut ctx, server_reuse, final_error)
            .await
    }

    async fn handle_error(
        &self,
        mut session: Session,
        ctx: &mut <SV as ProxyHttp>::CTX,
        e: Box<Error>,
        context: &str,
    ) -> Option<ReusedHttpStream>
    where
        SV: ProxyHttp + Send + Sync + 'static,
        <SV as ProxyHttp>::CTX: Send + Sync,
    {
        let res = self.inner.fail_to_proxy(&mut session, &e, ctx).await;
        if !self.inner.suppress_error_log(&session, ctx, &e) {
            error!(
                "{context} {}, status: {}, {}",
                e,
                res.error_code,
                self.inner.request_summary(&session, ctx)
            );
        }
        self.inner.logging(&mut session, Some(&e), ctx).await;
        self.cleanup_sub_req(&mut session);

        session.downstream_session.on_proxy_failure(e);

        if res.can_reuse_downstream {
            let persistent_settings = HttpPersistentSettings::for_session(&session);
            session
                .downstream_session
                .finish()
                .await
                .ok()
                .flatten()
                .map(|s| ReusedHttpStream::new(s, Some(persistent_settings)))
        } else {
            None
        }
    }
}

/* Make process_subrequest() a trait to workaround https://github.com/rust-lang/rust/issues/78649
   if process_subrequest() is implemented as a member of HttpProxy, rust complains

error[E0391]: cycle detected when computing type of `proxy_cache::<impl at pingora-proxy/src/proxy_cache.rs:7:1: 7:23>::proxy_cache::{opaque#0}`
   --> pingora-proxy/src/proxy_cache.rs:13:10
    |
13  |     ) -> Option<(bool, Option<Box<Error>>)>

*/
#[async_trait]
pub trait Subrequest {
    async fn process_subrequest(
        self: Arc<Self>,
        session: Box<HttpSession>,
        sub_req_ctx: Box<SubrequestCtx>,
    );
}

#[async_trait]
impl<SV, C> Subrequest for HttpProxy<SV, C>
where
    SV: ProxyHttp + Send + Sync + 'static,
    <SV as ProxyHttp>::CTX: Send + Sync,
    C: custom::Connector,
{
    async fn process_subrequest(
        self: Arc<Self>,
        session: Box<HttpSession>,
        sub_req_ctx: Box<SubrequestCtx>,
    ) {
        debug!("starting subrequest");

        let mut session = match self.handle_new_request(session).await {
            Some(downstream_session) => Session::new(
                downstream_session,
                &self.downstream_modules,
                self.shutdown_flag.clone(),
            ),
            None => return, // bad request
        };

        // no real downstream to keepalive, but it doesn't matter what is set here because at the end
        // of this fn the dummy connection will be dropped
        session.set_keepalive(None);

        session.subrequest_ctx.replace(sub_req_ctx);
        trace!("processing subrequest");
        let ctx = self.inner.new_ctx();
        self.process_request(session, ctx).await;
        trace!("subrequest done");
    }
}

/// A handle to the underlying HTTP proxy app that allows spawning subrequests.
pub struct SubrequestSpawner {
    app: Arc<dyn Subrequest + Send + Sync>,
}

/// A [`PreparedSubrequest`] that is ready to run.
pub struct PreparedSubrequest {
    app: Arc<dyn Subrequest + Send + Sync>,
    session: Box<HttpSession>,
    sub_req_ctx: Box<SubrequestCtx>,
}

impl PreparedSubrequest {
    pub async fn run(self) {
        self.app
            .process_subrequest(self.session, self.sub_req_ctx)
            .await
    }

    pub fn session(&self) -> &HttpSession {
        self.session.as_ref()
    }

    pub fn session_mut(&mut self) -> &mut HttpSession {
        self.session.deref_mut()
    }
}

impl SubrequestSpawner {
    /// Create a new [`SubrequestSpawner`].
    pub fn new(app: Arc<dyn Subrequest + Send + Sync>) -> SubrequestSpawner {
        SubrequestSpawner { app }
    }

    /// Spawn a background subrequest and return a join handle.
    // TODO: allow configuring the subrequest session before use
    pub fn spawn_background_subrequest(
        &self,
        session: &HttpSession,
        ctx: SubrequestCtx,
    ) -> tokio::task::JoinHandle<()> {
        let new_app = self.app.clone(); // Clone the Arc
        let (mut session, handle) = subrequest::create_session(session);
        if ctx.body_mode() == BodyMode::NoBody {
            session
                .as_subrequest_mut()
                .expect("created subrequest session")
                .clear_request_body_headers();
        }
        let sub_req_ctx = Box::new(ctx);
        handle.drain_tasks();
        tokio::spawn(async move {
            new_app
                .process_subrequest(Box::new(session), sub_req_ctx)
                .await;
        })
    }

    /// Create a subrequest that listens to `HttpTask`s sent from the returned `Sender`
    /// and sends `HttpTask`s to the returned `Receiver`.
    ///
    /// To run that subrequest, call `run()`.
    // TODO: allow configuring the subrequest session before use
    pub fn create_subrequest(
        &self,
        session: &HttpSession,
        ctx: SubrequestCtx,
    ) -> (PreparedSubrequest, SubrequestHandle) {
        let new_app = self.app.clone(); // Clone the Arc
        let (mut session, handle) = subrequest::create_session(session);
        if ctx.body_mode() == BodyMode::NoBody {
            session
                .as_subrequest_mut()
                .expect("created subrequest session")
                .clear_request_body_headers();
        }
        let sub_req_ctx = Box::new(ctx);
        (
            PreparedSubrequest {
                app: new_app,
                session: Box::new(session),
                sub_req_ctx,
            },
            handle,
        )
    }
}

#[async_trait]
impl<SV, C> HttpServerApp for HttpProxy<SV, C>
where
    SV: ProxyHttp + Send + Sync + 'static,
    <SV as ProxyHttp>::CTX: Send + Sync,
    C: custom::Connector,
{
    async fn process_new_http(
        self: &Arc<Self>,
        session: HttpSession,
        shutdown: &ShutdownWatch,
    ) -> Option<ReusedHttpStream> {
        let session = Box::new(session);

        // TODO: keepalive pool, use stack
        let mut session = match self.handle_new_request(session).await {
            Some(downstream_session) => Session::new(
                downstream_session,
                &self.downstream_modules,
                self.shutdown_flag.clone(),
            ),
            None => return None, // bad request
        };

        if *shutdown.borrow() {
            // stop downstream from reusing if this service is shutting down soon
            session.set_keepalive(None);
        }

        let ctx = self.inner.new_ctx();
        self.process_request(session, ctx).await
    }

    async fn http_cleanup(&self) {
        self.shutdown_flag.store(true, Ordering::Release);
        // Notify all keepalived requests blocking on read_request() to abort
        self.shutdown.notify_waiters();
    }

    fn server_options(&self) -> Option<&HttpServerOptions> {
        self.server_options.as_ref()
    }

    fn h2_options(&self) -> Option<H2Options> {
        self.h2_options.clone()
    }
    async fn process_custom_session(
        self: Arc<Self>,
        stream: Stream,
        shutdown: &ShutdownWatch,
    ) -> Option<Stream> {
        let app = self.clone();

        let Some(process_custom_session) = app.process_custom_session.as_ref() else {
            warn!("custom was called on an empty on_custom");
            return None;
        };

        process_custom_session(self.clone(), stream, shutdown).await
    }

    // TODO implement h2_options
}

use pingora_core::services::listening::Service;

/// Create an [`HttpProxy`] without wrapping it in a [`Service`].
///
/// This is useful when you need to integrate `HttpProxy` into a custom accept loop,
/// for example when implementing SNI-based routing that decides between TLS passthrough
/// and TLS termination on a single port.
///
/// The returned `HttpProxy` is fully initialized and ready to process requests via
/// [`HttpServerApp::process_new_http()`].
///
/// # Example
///
/// ```ignore
/// use pingora_proxy::http_proxy;
/// use std::sync::Arc;
///
/// // Create the proxy
/// let proxy = Arc::new(http_proxy(&server_conf, my_proxy_app));
///
/// // In your custom accept loop:
/// loop {
///     let (stream, addr) = listener.accept().await?;
///
///     // Peek SNI, decide routing...
///     if should_terminate_tls {
///         let tls_stream = my_acceptor.accept(stream).await?;
///         let session = HttpSession::new_http1(Box::new(tls_stream));
///         proxy.process_new_http(session, &shutdown).await;
///     }
/// }
/// ```
pub fn http_proxy<SV>(conf: &Arc<ServerConf>, inner: SV) -> HttpProxy<SV>
where
    SV: ProxyHttp,
{
    let mut proxy = HttpProxy::new(inner, conf.clone());
    proxy.handle_init_modules();
    proxy
}

/// Create a [Service] from the user implemented [ProxyHttp].
///
/// The returned [Service] can be hosted by a [pingora_core::server::Server] directly.
pub fn http_proxy_service<SV>(conf: &Arc<ServerConf>, inner: SV) -> Service<HttpProxy<SV, ()>>
where
    SV: ProxyHttp,
{
    http_proxy_service_with_name(conf, inner, "Pingora HTTP Proxy Service")
}

/// Create a [Service] from the user implemented [ProxyHttp].
///
/// The returned [Service] can be hosted by a [pingora_core::server::Server] directly.
pub fn http_proxy_service_with_name<SV>(
    conf: &Arc<ServerConf>,
    inner: SV,
    name: &str,
) -> Service<HttpProxy<SV, ()>>
where
    SV: ProxyHttp,
{
    let mut proxy = HttpProxy::new(inner, conf.clone());
    proxy.handle_init_modules();
    Service::new(name.to_string(), proxy)
}

/// Create a [Service] from the user implemented [ProxyHttp].
///
/// The returned [Service] can be hosted by a [pingora_core::server::Server] directly.
pub fn http_proxy_service_with_name_custom<SV, C>(
    conf: &Arc<ServerConf>,
    inner: SV,
    name: &str,
    connector: C,
    on_custom: ProcessCustomSession<SV, C>,
) -> Service<HttpProxy<SV, C>>
where
    SV: ProxyHttp + Send + Sync + 'static,
    SV::CTX: Send + Sync + 'static,
    C: custom::Connector,
{
    let mut proxy = HttpProxy::new_custom(inner, conf.clone(), connector, Some(on_custom), None);
    proxy.handle_init_modules();

    Service::new(name.to_string(), proxy)
}

/// A builder for a [Service] that can be used to create a [HttpProxy] instance
///
/// The [ProxyServiceBuilder] can be used to construct a [HttpProxy] service with a custom name,
/// connector, and custom session handler.
///
pub struct ProxyServiceBuilder<SV, C>
where
    SV: ProxyHttp + Send + Sync + 'static,
    SV::CTX: Send + Sync + 'static,
    C: custom::Connector,
{
    conf: Arc<ServerConf>,
    inner: SV,
    name: String,
    connector: C,
    custom: Option<ProcessCustomSession<SV, C>>,
    server_options: Option<HttpServerOptions>,
}

impl<SV> ProxyServiceBuilder<SV, ()>
where
    SV: ProxyHttp + Send + Sync + 'static,
    SV::CTX: Send + Sync + 'static,
{
    /// Create a new [ProxyServiceBuilder] with the given [ServerConf] and [ProxyHttp]
    /// implementation.
    ///
    /// The returned builder can be used to construct a [HttpProxy] service with a custom name,
    /// connector, and custom session handler.
    ///
    /// The [ProxyServiceBuilder] will default to using the [ProxyHttp] implementation and no custom
    /// session handler.
    ///
    pub fn new(conf: &Arc<ServerConf>, inner: SV) -> Self {
        ProxyServiceBuilder {
            conf: conf.clone(),
            inner,
            name: "Pingora HTTP Proxy Service".into(),
            connector: (),
            custom: None,
            server_options: None,
        }
    }
}

impl<SV, C> ProxyServiceBuilder<SV, C>
where
    SV: ProxyHttp + Send + Sync + 'static,
    SV::CTX: Send + Sync + 'static,
    C: custom::Connector,
{
    /// Sets the name of the [HttpProxy] service.
    pub fn name(mut self, name: impl AsRef<str>) -> Self {
        self.name = name.as_ref().to_owned();
        self
    }

    /// Set a custom connector and custom session handler for the [ProxyServiceBuilder].
    ///
    /// The custom connector is used to establish a connection to the upstream server.
    ///
    /// The custom session handler is used to handle custom protocol specific logic
    /// between the proxy and the upstream server.
    ///
    /// Returns a new [ProxyServiceBuilder] with the custom connector and session handler.
    pub fn custom<C2: custom::Connector>(
        self,
        connector: C2,
        on_custom: ProcessCustomSession<SV, C2>,
    ) -> ProxyServiceBuilder<SV, C2> {
        let Self {
            conf,
            inner,
            name,
            server_options,
            ..
        } = self;
        ProxyServiceBuilder {
            conf,
            inner,
            name,
            connector,
            custom: Some(on_custom),
            server_options,
        }
    }

    /// Set the server options for the [ProxyServiceBuilder].
    ///
    /// Returns a new [ProxyServiceBuilder] with the server options set.
    pub fn server_options(mut self, options: HttpServerOptions) -> Self {
        self.server_options = Some(options);
        self
    }

    /// Builds a new [Service] from the [ProxyServiceBuilder].
    ///
    /// This function takes ownership of the [ProxyServiceBuilder] and returns a new [Service] with
    /// a fully initialized [HttpProxy].
    ///
    /// The returned [Service] is ready to be used by a [pingora_core::server::Server].
    pub fn build(self) -> Service<HttpProxy<SV, C>> {
        let Self {
            conf,
            inner,
            name,
            connector,
            custom,
            server_options,
        } = self;

        let mut proxy = HttpProxy::new_custom(inner, conf, connector, custom, server_options);

        proxy.handle_init_modules();
        Service::new(name, proxy)
    }
}


================================================
FILE: pingora-proxy/src/proxy_cache.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use super::*;
use http::header::{CONTENT_ENCODING, CONTENT_LENGTH, CONTENT_TYPE, TRANSFER_ENCODING};
use http::{Method, StatusCode};
use pingora_cache::key::CacheHashKey;
use pingora_cache::lock::LockStatus;
use pingora_cache::max_file_size::ERR_RESPONSE_TOO_LARGE;
use pingora_cache::{ForcedFreshness, HitHandler, HitStatus, RespCacheable::*};
use pingora_core::protocols::http::conditional_filter::to_304;
use pingora_core::protocols::http::v1::common::header_value_content_length;
use pingora_core::ErrorType;
use range_filter::RangeBodyFilter;
use std::time::SystemTime;

impl<SV, C> HttpProxy<SV, C>
where
    C: custom::Connector,
{
    // return bool: server_session can be reused, and error if any
    pub(crate) async fn proxy_cache(
        self: &Arc<Self>,
        session: &mut Session,
        ctx: &mut SV::CTX,
    ) -> Option<(bool, Option<Box<Error>>)>
    // None: continue to proxy, Some: return
    where
        SV: ProxyHttp + Send + Sync + 'static,
        SV::CTX: Send + Sync,
    {
        // Cache logic request phase
        if let Err(e) = self.inner.request_cache_filter(session, ctx) {
            // TODO: handle this error
            warn!(
                "Fail to request_cache_filter: {e}, {}",
                self.inner.request_summary(session, ctx)
            );
        }

        // cache key logic, should this be part of request_cache_filter?
        if session.cache.enabled() {
            match self.inner.cache_key_callback(session, ctx) {
                Ok(key) => {
                    session.cache.set_cache_key(key);
                }
                Err(e) => {
                    // TODO: handle this error
                    session.cache.disable(NoCacheReason::StorageError);
                    warn!(
                        "Fail to cache_key_callback: {e}, {}",
                        self.inner.request_summary(session, ctx)
                    );
                }
            }
        }

        // cache purge logic: PURGE short-circuits rest of request
        if self.inner.is_purge(session, ctx) {
            return self.proxy_purge(session, ctx).await;
        }

        // bypass cache lookup if we predict to be uncacheable
        if session.cache.enabled() && !session.cache.cacheable_prediction() {
            session.cache.bypass();
        }

        if !session.cache.enabled() {
            return None;
        }

        // cache lookup logic
        loop {
            // for cache lock, TODO: cap the max number of loops
            match session.cache.cache_lookup().await {
                Ok(res) => {
                    let mut hit_status_opt = None;
                    if let Some((mut meta, mut handler)) = res {
                        // Vary logic
                        // Because this branch can be called multiple times in a loop, and we only
                        // need to update the vary once, check if variance is already set to
                        // prevent unnecessary vary lookups.
                        let cache_key = session.cache.cache_key();
                        if let Some(variance) = cache_key.variance_bin() {
                            // We've looked up a secondary slot.
                            // Adhoc double check that the variance found is the variance we want.
                            if Some(variance) != meta.variance() {
                                warn!("Cache variance mismatch, {variance:?}, {cache_key:?}");
                                session.cache.disable(NoCacheReason::InternalError);
                                break None;
                            }
                        } else {
                            // Basic cache key; either variance is off, or this is the primary slot.
                            let req_header = session.req_header();
                            let variance = self.inner.cache_vary_filter(&meta, ctx, req_header);
                            if let Some(variance) = variance {
                                // Variance is on. This is the primary slot.
                                if !session.cache.cache_vary_lookup(variance, &meta) {
                                    // This wasn't the desired variant. Updated cache key variance, cause another
                                    // lookup to get the desired variant, which would be in a secondary slot.
                                    continue;
                                }
                            } // else: vary is not in use
                        }

                        // Either no variance, or the current handler targets the correct variant.

                        // hit
                        // TODO: maybe round and/or cache now()
                        let is_fresh = meta.is_fresh(SystemTime::now());
                        // check if we should force expire or force miss
                        let hit_status = match self
                            .inner
                            .cache_hit_filter(session, &meta, &mut handler, is_fresh, ctx)
                            .await
                        {
                            Err(e) => {
                                error!(
                                    "Failed to filter cache hit: {e}, {}",
                                    self.inner.request_summary(session, ctx)
                                );
                                // this return value will cause us to fetch from upstream
                                HitStatus::FailedHitFilter
                            }
                            Ok(None) => {
                                if is_fresh {
                                    HitStatus::Fresh
                                } else {
                                    HitStatus::Expired
                                }
                            }
                            Ok(Some(ForcedFreshness::ForceExpired)) => {
                                // force expired asset should not be serve as stale
                                // because force expire is usually to remove data
                                meta.disable_serve_stale();
                                HitStatus::ForceExpired
                            }
                            Ok(Some(ForcedFreshness::ForceMiss)) => HitStatus::ForceMiss,
                            Ok(Some(ForcedFreshness::ForceFresh)) => HitStatus::Fresh,
                        };

                        hit_status_opt = Some(hit_status);

                        // init cache for hit / stale
                        session.cache.cache_found(meta, handler, hit_status);
                    }

                    if hit_status_opt.is_none_or(HitStatus::is_treated_as_miss) {
                        // cache miss
                        if session.cache.is_cache_locked() {
                            // Another request is filling the cache; try waiting til that's done and retry.
                            let lock_status = session.cache.cache_lock_wait().await;
                            if self.handle_lock_status(session, ctx, lock_status) {
                                continue;
                            } else {
                                break None;
                            }
                        } else {
                            self.inner.cache_miss(session, ctx);
                            break None;
                        }
                    }

                    // Safe because an empty hit status would have broken out
                    // in the block above
                    let hit_status = hit_status_opt.expect("None case handled as miss");

                    if !hit_status.is_fresh() {
                        // expired or force expired asset
                        if session.cache.is_cache_locked() {
                            // first if this is the sub request for the background cache update
                            if let Some(write_lock) = session
                                .subrequest_ctx
                                .as_mut()
                                .and_then(|ctx| ctx.take_write_lock())
                            {
                                // Put the write lock in the request
                                session.cache.set_write_lock(write_lock);
                                session.cache.tag_as_subrequest();
                                // and then let it go to upstream
                                break None;
                            }
                            let will_serve_stale = session.cache.can_serve_stale_updating()
                                && self.inner.should_serve_stale(session, ctx, None);
                            if !will_serve_stale {
                                let lock_status = session.cache.cache_lock_wait().await;
                                if self.handle_lock_status(session, ctx, lock_status) {
                                    continue;
                                } else {
                                    break None;
                                }
                            }
                            // else continue to serve stale
                            session.cache.set_stale_updating();
                        } else if session.cache.is_cache_lock_writer() {
                            // stale while revalidate logic for the writer
                            let will_serve_stale = session.cache.can_serve_stale_updating()
                                && self.inner.should_serve_stale(session, ctx, None);
                            if will_serve_stale {
                                // create a background thread to do the actual update
                                // the subrequest handle is only None by this phase in unit tests
                                // that don't go through process_new_http
                                let (permit, cache_lock) = session.cache.take_write_lock();
                                SubrequestSpawner::new(self.clone()).spawn_background_subrequest(
                                    session.as_ref(),
                                    subrequest::Ctx::builder()
                                        .cache_write_lock(
                                            cache_lock,
                                            session.cache.cache_key().clone(),
                                            permit,
                                        )
                                        .build(),
                                );
                                // continue to serve stale for this request
                                session.cache.set_stale_updating();
                            } else {
                                // return to fetch from upstream
                                break None;
                            }
                        } else {
                            // return to fetch from upstream
                            break None;
                        }
                    }

                    let (reuse, err) = self.proxy_cache_hit(session, ctx).await;
                    if let Some(e) = err.as_ref() {
                        error!(
                            "Fail to serve cache: {e}, {}",
                            self.inner.request_summary(session, ctx)
                        );
                    }
                    // responses is served from cache, exit
                    break Some((reuse, err));
                }
                Err(e) => {
                    // Allow cache miss to fill cache even if cache lookup errors
                    // this is mostly to support backward incompatible metadata update
                    // TODO: check error types
                    // session.cache.disable();
                    self.inner.cache_miss(session, ctx);
                    warn!(
                        "Fail to cache lookup: {e}, {}",
                        self.inner.request_summary(session, ctx)
                    );
                    break None;
                }
            }
        }
    }

    // return bool: server_session can be reused, and error if any
    pub(crate) async fn proxy_cache_hit(
        &self,
        session: &mut Session,
        ctx: &mut SV::CTX,
    ) -> (bool, Option<Box<Error>>)
    where
        SV: ProxyHttp + Send + Sync,
        SV::CTX: Send + Sync,
    {
        use range_filter::*;

        let seekable = session.cache.hit_handler().can_seek();
        let mut header = cache_hit_header(&session.cache);

        let req = session.req_header();

        let not_modified = match self.inner.cache_not_modified_filter(session, &header, ctx) {
            Ok(not_modified) => not_modified,
            Err(e) => {
                // fail open if cache_not_modified_filter errors,
                // just return the whole original response
                warn!(
                    "Failed to run cache not modified filter: {e}, {}",
                    self.inner.request_summary(session, ctx)
                );
                false
            }
        };
        if not_modified {
            to_304(&mut header);
        }
        let header_only = not_modified || req.method == http::method::Method::HEAD;

        // process range header if the cache storage supports seek
        let range_type = if seekable && !session.ignore_downstream_range {
            self.inner.range_header_filter(session, &mut header, ctx)
        } else {
            RangeType::None
        };

        // return a 416 with an empty body for simplicity
        let header_only = header_only || matches!(range_type, RangeType::Invalid);
        debug!("header: {header:?}");

        // TODO: use ProxyUseCache to replace the logic below
        match self.inner.response_filter(session, &mut header, ctx).await {
            Ok(_) => {
                if let Err(e) = session
                    .downstream_modules_ctx
                    .response_header_filter(&mut header, header_only)
                    .await
                {
                    error!(
                        "Failed to run downstream modules response header filter in hit: {e}, {}",
                        self.inner.request_summary(session, ctx)
                    );
                    session
                        .as_mut()
                        .respond_error(500)
                        .await
                        .unwrap_or_else(|e| {
                            error!("failed to send error response to downstream: {e}");
                        });
                    // we have not write anything dirty to downstream, it is still reusable
                    return (true, Some(e));
                }

                if let Err(e) = session
                    .as_mut()
                    .write_response_header(header)
                    .await
                    .map_err(|e| e.into_down())
                {
                    // downstream connection is bad already
                    return (false, Some(e));
                }
            }
            Err(e) => {
                error!(
                    "Failed to run response filter in hit: {e}, {}",
                    self.inner.request_summary(session, ctx)
                );
                session
                    .as_mut()
                    .respond_error(500)
                    .await
                    .unwrap_or_else(|e| {
                        error!("failed to send error response to downstream: {e}");
                    });
                // we have not write anything dirty to downstream, it is still reusable
                return (true, Some(e));
            }
        }
        debug!("finished sending cached header to downstream");

        // If the function returns an Err, there was an issue seeking from the hit handler.
        //
        // Returning false means that no seeking or state change was done, either because the
        // hit handler doesn't support the seek or because multipart doesn't apply.
        fn seek_multipart(
            hit_handler: &mut HitHandler,
            range_filter: &mut RangeBodyFilter,
        ) -> Result<bool> {
            if !range_filter.is_multipart_range() || !hit_handler.can_seek_multipart() {
                return Ok(false);
            }
            let r = range_filter.next_cache_multipart_range();
            hit_handler.seek_multipart(r.start, Some(r.end))?;
            // we still need RangeBodyFilter's help to transform the byte
            // range into a multipart response.
            range_filter.set_current_cursor(r.start);
            Ok(true)
        }

        if !header_only {
            let mut maybe_range_filter = match &range_type {
                RangeType::Single(r) => {
                    if session.cache.hit_handler().can_seek() {
                        if let Err(e) = session.cache.hit_handler().seek(r.start, Some(r.end)) {
                            return (false, Some(e));
                        }
                        None
                    } else {
                        Some(RangeBodyFilter::new_range(range_type.clone()))
                    }
                }
                RangeType::Multi(_) => {
                    let mut range_filter = RangeBodyFilter::new_range(range_type.clone());
                    if let Err(e) = seek_multipart(session.cache.hit_handler(), &mut range_filter) {
                        return (false, Some(e));
                    }
                    Some(range_filter)
                }
                RangeType::Invalid => unreachable!(),
                RangeType::None => None,
            };
            loop {
                match session.cache.hit_handler().read_body().await {
                    Ok(raw_body) => {
                        let end = raw_body.is_none();

                        if end {
                            if let Some(range_filter) = maybe_range_filter.as_mut() {
                                if range_filter.should_cache_seek_again() {
                                    let e = match seek_multipart(
                                        session.cache.hit_handler(),
                                        range_filter,
                                    ) {
                                        Ok(true) => {
                                            // called seek(), read again
                                            continue;
                                        }
                                        Ok(false) => {
                                            // body reader can no longer seek multipart,
                                            // but cache wants to continue seeking
                                            // the body will just end in this case if we pass the
                                            // None through
                                            // (TODO: how might hit handlers want to recover from
                                            // this situation)?
                                            Error::explain(
                                                InternalError,
                                                "hit handler cannot seek for multipart again",
                                            )
                                            // the body will just end in this case.
                                        }
                                        Err(e) => e,
                                    };
                                    return (false, Some(e));
                                }
                            }
                        }

                        let mut body = if let Some(range_filter) = maybe_range_filter.as_mut() {
                            range_filter.filter_body(raw_body)
                        } else {
                            raw_body
                        };

                        match self
                            .inner
                            .response_body_filter(session, &mut body, end, ctx)
                        {
                            Ok(Some(duration)) => {
                                trace!("delaying response for {duration:?}");
                                time::sleep(duration).await;
                            }
                            Ok(None) => { /* continue */ }
                            Err(e) => {
                                // body is being sent, don't treat downstream as reusable
                                return (false, Some(e));
                            }
                        }

                        if let Err(e) = session
                            .downstream_modules_ctx
                            .response_body_filter(&mut body, end)
                        {
                            // body is being sent, don't treat downstream as reusable
                            return (false, Some(e));
                        }

                        if !end && body.as_ref().is_none_or(|b| b.is_empty()) {
                            // Don't write empty body which will end session,
                            // still more hit handler bytes to read
                            continue;
                        }

                        // write to downstream
                        let b = body.unwrap_or_default();
                        if let Err(e) = session
                            .as_mut()
                            .write_response_body(b, end)
                            .await
                            .map_err(|e| e.into_down())
                        {
                            return (false, Some(e));
                        }
                        if end {
                            break;
                        }
                    }
                    Err(e) => return (false, Some(e)),
                }
            }
        }

        if let Err(e) = session.cache.finish_hit_handler().await {
            warn!("Error during finish_hit_handler: {}", e);
        }

        match session.as_mut().finish_body().await {
            Ok(_) => {
                debug!("finished sending cached body to downstream");
                (true, None)
            }
            Err(e) => (false, Some(e)),
        }
    }

    /* Downstream revalidation, only needed when cache is on because otherwise origin
     * will handle it */
    pub(crate) fn downstream_response_conditional_filter(
        &self,
        use_cache: &mut ServeFromCache,
        session: &Session,
        resp: &mut ResponseHeader,
        ctx: &mut SV::CTX,
    ) where
        SV: ProxyHttp,
    {
        // TODO: range
        let req = session.req_header();

        let not_modified = match self.inner.cache_not_modified_filter(session, resp, ctx) {
            Ok(not_modified) => not_modified,
            Err(e) => {
                // fail open if cache_not_modified_filter errors,
                // just return the whole original response
                warn!(
                    "Failed to run cache not modified filter: {e}, {}",
                    self.inner.request_summary(session, ctx)
                );
                false
            }
        };

        if not_modified {
            to_304(resp);
        }
        let header_only = not_modified || req.method == http::method::Method::HEAD;
        if header_only && use_cache.is_on() {
            // tell cache to stop serving downstream after yielding header
            // (misses will continue to allow admitting upstream into cache)
            use_cache.enable_header_only();
        }
    }

    // TODO: cache upstream header filter to add/remove headers

    pub(crate) async fn cache_http_task(
        &self,
        session: &mut Session,
        task: &HttpTask,
        ctx: &mut SV::CTX,
        serve_from_cache: &mut ServeFromCache,
    ) -> Result<()>
    where
        SV: ProxyHttp + Send + Sync,
        SV::CTX: Send + Sync,
    {
        if !session.cache.enabled() && !session.cache.bypassing() {
            return Ok(());
        }

        match task {
            HttpTask::Header(header, end_stream) => {
                // decide if cacheable and create cache meta
                // for now, skip 1xxs (should not affect response cache decisions)
                // However 101 is an exception because it is the final response header
                if header.status.is_informational()
                    && header.status != StatusCode::SWITCHING_PROTOCOLS
                {
                    return Ok(());
                }
                match self.inner.response_cache_filter(session, header, ctx)? {
                    Cacheable(meta) => {
                        let mut fill_cache = true;
                        if session.cache.bypassing() {
                            // The cache might have been bypassed because the response exceeded the
                            // maximum cacheable asset size. If that looks like the case (there
                            // is a maximum file size configured and we don't know the content
                            // length up front), attempting to re-enable the cache now would cause
                            // the request to fail when the chunked response exceeds the maximum
                            // file size again.
                            if session.cache.max_file_size_bytes().is_some()
                                && !meta.headers().contains_key(header::CONTENT_LENGTH)
                            {
                                session
                                    .cache
                                    .disable(NoCacheReason::PredictedResponseTooLarge);
                                return Ok(());
                            }

                            session.cache.response_became_cacheable();

                            if session.req_header().method == Method::GET
                                && meta.response_header().status == StatusCode::OK
                            {
                                self.inner.cache_miss(session, ctx);
                                if !session.cache.enabled() {
                                    fill_cache = false;
                                }
                            } else {
                                // we've allowed caching on the next request,
                                // but do not cache _this_ request if bypassed and not 200
                                // (We didn't run upstream request cache filters to strip range or condition headers,
                                // so this could be an uncacheable response e.g. 206 or 304 or HEAD.
                                // Exclude all non-200/GET for simplicity, may expand allowable codes in the future.)
                                fill_cache = false;
                                session.cache.disable(NoCacheReason::Deferred);
                            }
                        }

                        // If the Content-Length is known, and a maximum asset size has been configured
                        // on the cache, validate that the response does not exceed the maximum asset size.
                        if session.cache.enabled() {
                            if let Some(max_file_size) = session.cache.max_file_size_bytes() {
                                let content_length_hdr = meta.headers().get(header::CONTENT_LENGTH);
                                if let Some(content_length) =
                                    header_value_content_length(content_length_hdr)
                                {
                                    if content_length > max_file_size {
                                        fill_cache = false;
                                        session.cache.response_became_uncacheable(
                                            NoCacheReason::ResponseTooLarge,
                                        );
                                        session.cache.disable(NoCacheReason::ResponseTooLarge);
                                        // too large to cache, disable ranging
                                        session.ignore_downstream_range = true;
                                    }
                                }
                                // if the content-length header is not specified, the miss handler
                                // will count the response size on the fly, aborting the request
                                // mid-transfer if the max file size is exceeded
                            }
                        }
                        if fill_cache {
                            let req_header = session.req_header();
                            // Update the variance in the meta via the same callback,
                            // cache_vary_filter(), used in cache lookup for consistency.
                            // Future cache lookups need a matching variance in the meta
                            // with the cache key to pick up the correct variance
                            let variance = self.inner.cache_vary_filter(&meta, ctx, req_header);
                            session.cache.set_cache_meta(meta);
                            session.cache.update_variance(variance);
                            // this sends the meta and header
                            session.cache.set_miss_handler().await?;
                            if session.cache.miss_body_reader().is_some() {
                                serve_from_cache.enable_miss();
                            }
                            if *end_stream {
                                session
                                    .cache
                                    .miss_handler()
                                    .unwrap() // safe, it is set above
                                    .write_body(Bytes::new(), true)
                                    .await?;
                                session.cache.finish_miss_handler().await?;
                            }
                        }
                    }
                    Uncacheable(reason) => {
                        if !session.cache.bypassing() {
                            // mark as uncacheable, so we bypass cache next time
                            session.cache.response_became_uncacheable(reason);
                        }
                        session.cache.disable(reason);
                    }
                }
            }
            HttpTask::Body(data, end_stream) | HttpTask::UpgradedBody(data, end_stream) => {
                // It is not normally advisable to cache upgraded responses
                // e.g. they are essentially close-delimited, so they are easily truncated
                // but the framework still allows for it
                match data {
                    Some(d) => {
                        if session.cache.enabled() {
                            // TODO: do this async
                            // fail if writing the body would exceed the max_file_size_bytes
                            let body_size_allowed =
                                session.cache.track_body_bytes_for_max_file_size(d.len());
                            if !body_size_allowed {
                                debug!("chunked response exceeded max cache size, remembering that it is uncacheable");
                                session
                                    .cache
                                    .response_became_uncacheable(NoCacheReason::ResponseTooLarge);

                                return Error::e_explain(
                                    ERR_RESPONSE_TOO_LARGE,
                                    format!(
                                        "writing data of size {} bytes would exceed max file size of {} bytes",
                                        d.len(),
                                        session.cache.max_file_size_bytes().expect("max file size bytes must be set to exceed size")
                                    ),
                                );
                            }

                            // this will panic if more data is sent after we see end_stream
                            // but should be impossible in real world
                            let miss_handler = session.cache.miss_handler().unwrap();

                            miss_handler.write_body(d.clone(), *end_stream).await?;
                            if *end_stream {
                                session.cache.finish_miss_handler().await?;
                            }
                        }
                    }
                    None => {
                        if session.cache.enabled() && *end_stream {
                            session.cache.finish_miss_handler().await?;
                        }
                    }
                }
            }
            HttpTask::Trailer(_) => {} // h1 trailer is not supported yet
            HttpTask::Done => {
                if session.cache.enabled() {
                    session.cache.finish_miss_handler().await?;
                }
            }
            HttpTask::Failed(_) => {
                // TODO: handle this failure: delete the temp files?
            }
        }
        Ok(())
    }

    // Decide if local cache can be used according to upstream http header
    // 1. when upstream returns 304, the local cache is refreshed and served fresh
    // 2. when upstream returns certain HTTP error status, the local cache is served stale
    // Return true if local cache should be used, false otherwise
    pub(crate) async fn revalidate_or_stale(
        &self,
        session: &mut Session,
        task: &mut HttpTask,
        ctx: &mut SV::CTX,
    ) -> bool
    where
        SV: ProxyHttp + Send + Sync,
        SV::CTX: Send + Sync,
    {
        if !session.cache.enabled() {
            return false;
        }

        match task {
            HttpTask::Header(resp, _eos) => {
                if resp.status == StatusCode::NOT_MODIFIED {
                    if session.cache.maybe_cache_meta().is_some() {
                        // run upstream response filters on upstream 304 first
                        if let Err(err) = self
                            .inner
                            .upstream_response_filter(session, resp, ctx)
                            .await
                        {
                            error!("upstream response filter error on 304: {err:?}");
                            session.cache.revalidate_uncacheable(
                                *resp.clone(),
                                NoCacheReason::InternalError,
                            );
                            // always serve from cache after receiving the 304
                            return true;
                        }
                        // 304 doesn't contain all the headers, merge 304 into cached 200 header
                        // in order for response_cache_filter to run correctly
                        let merged_header = session.cache.revalidate_merge_header(resp);
                        match self
                            .inner
                            .response_cache_filter(session, &merged_header, ctx)
                        {
                            Ok(Cacheable(mut meta)) => {
                                // For simplicity, ignore changes to variance over 304 for now.
                                // Note this means upstream can only update variance via 2xx
                                // (expired response).
                                //
                                // TODO: if we choose to respect changing Vary / variance over 304,
                                // then there are a few cases to consider. See `update_variance` in
                                // the `pingora-cache` module.
                                let old_meta = session.cache.maybe_cache_meta().unwrap(); // safe, checked above
                                if let Some(old_variance) = old_meta.variance() {
                                    meta.set_variance(old_variance);
                                }
                                if let Err(e) = session.cache.revalidate_cache_meta(meta).await {
                                    // Fail open: we can continue use the revalidated response even
                                    // if the meta failed to write to storage
                                    warn!("revalidate_cache_meta failed {e:?}");
                                }
                            }
                            Ok(Uncacheable(reason)) => {
                                // This response was once cacheable, and upstream tells us it has not changed
                                // but now we decided it is uncacheable!
                                // RFC 9111: still allowed to reuse stored response this time because
                                // it was "successfully validated"
                                // https://www.rfc-editor.org/rfc/rfc9111#constructing.responses.from.caches
                                // Serve the response, but do not update cache

                                // We also want to avoid poisoning downstream's cache with an unsolicited 304
                                // if we did not receive a conditional request from downstream
                                // (downstream may have a different cacheability assessment and could cache the 304)

                                //TODO: log more
                                debug!("Uncacheable {reason:?} 304 received");
                                session.cache.response_became_uncacheable(reason);
                                session.cache.revalidate_uncacheable(merged_header, reason);
                            }
                            Err(e) => {
                                // Error during revalidation, similarly to the reasons above
                                // (avoid poisoning downstream cache with passthrough 304),
                                // allow serving the stored response without updating cache
                                warn!("Error {e:?} response_cache_filter during revalidation");
                                session.cache.revalidate_uncacheable(
                                    merged_header,
                                    NoCacheReason::InternalError,
                                );
                                // Assume the next 304 may succeed, so don't mark uncacheable
                            }
                        }
                        // always serve from cache after receiving the 304
                        true
                    } else {
                        //TODO: log more
                        warn!("304 received without cached asset, disable caching");
                        let reason = NoCacheReason::Custom("304 on miss");
                        session.cache.response_became_uncacheable(reason);
                        session.cache.disable(reason);
                        false
                    }
                } else if resp.status.is_server_error() {
                    // stale if error logic, 5xx only for now

                    // this is response header filter, response_written should always be None?
                    if !session.cache.can_serve_stale_error()
                        || session.response_written().is_some()
                    {
                        return false;
                    }

                    // create an error to encode the http status code
                    let http_status_error = Error::create(
                        ErrorType::HTTPStatus(resp.status.as_u16()),
                        ErrorSource::Upstream,
                        None,
                        None,
                    );
                    if self
                        .inner
                        .should_serve_stale(session, ctx, Some(&http_status_error))
                    {
                        // no more need to keep the write lock
                        session
                            .cache
                            .release_write_lock(NoCacheReason::UpstreamError);
                        true
                    } else {
                        false
                    }
                } else {
                    false // not 304, not stale if error status code
                }
            }
            _ => false, // not header
        }
    }

    // None: no staled asset is used, Some(_): staled asset is sent to downstream
    // bool: can the downstream connection be reused
    pub(crate) async fn handle_stale_if_error(
        &self,
        session: &mut Session,
        ctx: &mut SV::CTX,
        error: &Error,
    ) -> Option<(bool, Option<Box<Error>>)>
    where
        SV: ProxyHttp + Send + Sync,
        SV::CTX: Send + Sync,
    {
        // the caller might already checked this as an optimization
        if !session.cache.can_serve_stale_error() {
            return None;
        }

        // the error happen halfway through a regular response to downstream
        // can't resend the response
        if session.response_written().is_some() {
            return None;
        }

        // check error types
        if !self.inner.should_serve_stale(session, ctx, Some(error)) {
            return None;
        }

        // log the original error
        warn!(
            "Fail to proxy: {}, serving stale, {}",
            error,
            self.inner.request_summary(session, ctx)
        );

        // no more need to hang onto the cache lock
        session
            .cache
            .release_write_lock(NoCacheReason::UpstreamError);

        Some(self.proxy_cache_hit(session, ctx).await)
    }

    // helper function to check when to continue to retry lock (true) or give up (false)
    fn handle_lock_status(
        &self,
        session: &mut Session,
        ctx: &SV::CTX,
        lock_status: LockStatus,
    ) -> bool
    where
        SV: ProxyHttp,
    {
        debug!("cache unlocked {lock_status:?}");
        match lock_status {
            // should lookup the cached asset again
            LockStatus::Done => true,
            // should compete to be a new writer
            LockStatus::TransientError => true,
            // the request is uncacheable, go ahead to fetch from the origin
            LockStatus::GiveUp => {
                // TODO: It will be nice for the writer to propagate the real reason
                session.cache.disable(NoCacheReason::CacheLockGiveUp);
                // not cacheable, just go to the origin.
                false
            }
            // treat this the same as TransientError
            LockStatus::Dangling => {
                // software bug, but request can recover from this
                warn!(
                    "Dangling cache lock, {}",
                    self.inner.request_summary(session, ctx)
                );
                true
            }
            // If this reader has spent too long waiting on locks, let the request
            // through while disabling cache (to avoid amplifying disk writes).
            LockStatus::WaitTimeout => {
                warn!(
                    "Cache lock timeout, {}",
                    self.inner.request_summary(session, ctx)
                );
                session.cache.disable(NoCacheReason::CacheLockTimeout);
                // not cacheable, just go to the origin.
                false
            }
            // When a singular cache lock has been held for too long,
            // we should allow requests to recompete for the lock
            // to protect upstreams from load.
            LockStatus::AgeTimeout => true,
            // software bug, this status should be impossible to reach
            LockStatus::Waiting => panic!("impossible LockStatus::Waiting"),
        }
    }
}

fn cache_hit_header(cache: &HttpCache) -> Box<ResponseHeader> {
    let mut header = Box::new(cache.cache_meta().response_header_copy());
    // convert cache response

    // these status codes / method cannot have body, so no need to add chunked encoding
    let no_body = matches!(header.status.as_u16(), 204 | 304);

    // https://www.rfc-editor.org/rfc/rfc9111#section-4:
    // When a stored response is used to satisfy a request without validation, a cache
    // MUST generate an Age header field
    if !cache.upstream_used() {
        let age = cache.cache_meta().age().as_secs();
        header.insert_header(http::header::AGE, age).unwrap();
    }
    log::debug!("cache header: {header:?} {:?}", cache.phase());

    // currently storage cache is always considered an h1 upstream
    // (header-serde serializes as h1.0 or h1.1)
    // set this header to be h1.1
    header.set_version(Version::HTTP_11);

    /* Add chunked header to tell downstream to use chunked encoding
     * during the absent of content-length in h2 */
    if !no_body
        && !header.status.is_informational()
        && header.headers.get(http::header::CONTENT_LENGTH).is_none()
    {
        header
            .insert_header(http::header::TRANSFER_ENCODING, "chunked")
            .unwrap();
    }
    header
}

// https://datatracker.ietf.org/doc/html/rfc7233#section-3
pub mod range_filter {
    use super::*;
    use bytes::BytesMut;
    use http::header::*;
    use std::ops::Range;

    // parse bytes into usize, ignores specific error
    fn parse_number(input: &[u8]) -> Option<usize> {
        str::from_utf8(input).ok()?.parse().ok()
    }

    fn parse_range_header(
        range: &[u8],
        content_length: usize,
        max_multipart_ranges: Option<usize>,
    ) -> RangeType {
        use regex::Regex;

        // Match individual range parts, (e.g. "0-100", "-5", "1-")
        static RE_SINGLE_RANGE_PART: Lazy<Regex> =
            Lazy::new(|| Regex::new(r"(?i)^\s*(?P<start>\d*)-(?P<end>\d*)\s*$").unwrap());

        // Convert bytes to UTF-8 string
        let range_str = match str::from_utf8(range) {
            Ok(s) => s,
            Err(_) => return RangeType::None,
        };

        // Split into "bytes=" and the actual range(s)
        let mut parts = range_str.splitn(2, "=");

        // Check if it starts with "bytes="
        let prefix = parts.next();
        if !prefix.is_some_and(|s| s.eq_ignore_ascii_case("bytes")) {
            return RangeType::None;
        }

        let Some(ranges_str) = parts.next() else {
            // No ranges provided
            return RangeType::None;
        };

        // "bytes=" with an empty (or whitespace-only) range-set is syntactically a
        // range request with zero satisfiable range-specs, so return 416.
        if ranges_str.trim().is_empty() {
            return RangeType::Invalid;
        }

        // Get the actual range string (e.g."100-200,300-400")
        let mut range_count = 0;
        for _ in ranges_str.split(',') {
            range_count += 1;
            if let Some(max_ranges) = max_multipart_ranges {
                if range_count >= max_ranges {
                    // If we get more than max configured ranges, return None for now to save parsing time
                    return RangeType::None;
                }
            }
        }
        let mut ranges: Vec<Range<usize>> = Vec::with_capacity(range_count);

        // Process each range
        let mut last_range_end = 0;
        for part in ranges_str.split(',') {
            let captured = match RE_SINGLE_RANGE_PART.captures(part) {
                Some(c) => c,
                None => {
                    return RangeType::None;
                }
            };

            let maybe_start = captured
                .name("start")
                .and_then(|s| s.as_str().parse::<usize>().ok());
            let end = captured
                .name("end")
                .and_then(|s| s.as_str().parse::<usize>().ok());

            let range = if let Some(start) = maybe_start {
                if start >= content_length {
                    // Skip the invalid range
                    continue;
                }
                // open-ended range should end at the last byte
                // over sized end is allowed but ignored
                // range end is inclusive
                let end = std::cmp::min(end.unwrap_or(content_length - 1), content_length - 1) + 1;
                if end <= start {
                    // Skip the invalid range
                    continue;
                }
                start..end
            } else {
                // start is empty, this changes the meaning of the value of `end`
                // Now it means to read the last `end` bytes
                if let Some(end) = end {
                    if content_length >= end {
                        (content_length - end)..content_length
                    } else {
                        // over sized end is allowed but ignored
                        0..content_length
                    }
                } else {
                    // No start or end, skip the invalid range
                    continue;
                }
            };
            // For now we stick to non-overlapping, ascending ranges for simplicity
            // and parity with nginx
            if range.start < last_range_end {
                return RangeType::None;
            }
            last_range_end = range.end;
            ranges.push(range);
        }

        // Note for future: we can technically coalesce multiple ranges for multipart
        //
        // https://www.rfc-editor.org/rfc/rfc9110#section-17.15
        // "Servers ought to ignore, coalesce, or reject egregious range
        // requests, such as requests for more than two overlapping ranges or
        // for many small ranges in a single set, particularly when the ranges
        // are requested out of order for no apparent reason. Multipart range
        // requests are not designed to support random access."

        if ranges.is_empty() {
            // We got some ranges, processed them but none were valid
            RangeType::Invalid
        } else if ranges.len() == 1 {
            RangeType::Single(ranges[0].clone()) // Only 1 index
        } else {
            RangeType::Multi(MultiRangeInfo::new(ranges))
        }
    }
    #[test]
    fn test_parse_range() {
        assert_eq!(
            parse_range_header(b"bytes=0-1", 10, None),
            RangeType::new_single(0, 2)
        );
        assert_eq!(
            parse_range_header(b"bYTes=0-9", 10, None),
            RangeType::new_single(0, 10)
        );
        assert_eq!(
            parse_range_header(b"bytes=0-12", 10, None),
            RangeType::new_single(0, 10)
        );
        assert_eq!(
            parse_range_header(b"bytes=0-", 10, None),
            RangeType::new_single(0, 10)
        );
        assert_eq!(
            parse_range_header(b"bytes=2-1", 10, None),
            RangeType::Invalid
        );
        assert_eq!(
            parse_range_header(b"bytes=10-11", 10, None),
            RangeType::Invalid
        );
        assert_eq!(
            parse_range_header(b"bytes=-2", 10, None),
            RangeType::new_single(8, 10)
        );
        assert_eq!(
            parse_range_header(b"bytes=-12", 10, None),
            RangeType::new_single(0, 10)
        );
        assert_eq!(parse_range_header(b"bytes=-", 10, None), RangeType::Invalid);
        assert_eq!(parse_range_header(b"bytes=", 10, None), RangeType::Invalid);
        assert_eq!(
            parse_range_header(b"bytes=  ", 10, None),
            RangeType::Invalid
        );
    }

    // Add some tests for multi-range too
    #[test]
    fn test_parse_range_header_multi() {
        assert_eq!(
            parse_range_header(b"bytes=0-1,4-5", 10, None)
                .get_multirange_info()
                .expect("Should have multipart info for Multipart range request")
                .ranges,
            (vec![Range { start: 0, end: 2 }, Range { start: 4, end: 6 }])
        );
        // Last range is invalid because the content-length is too small
        assert_eq!(
            parse_range_header(b"bytEs=0-99,200-299,400-499", 320, None)
                .get_multirange_info()
                .expect("Should have multipart info for Multipart range request")
                .ranges,
            (vec![
                Range { start: 0, end: 100 },
                Range {
                    start: 200,
                    end: 300
                }
            ])
        );
        // Same as above but appropriate content length
        assert_eq!(
            parse_range_header(b"bytEs=0-99,200-299,400-499", 500, None)
                .get_multirange_info()
                .expect("Should have multipart info for Multipart range request")
                .ranges,
            vec![
                Range { start: 0, end: 100 },
                Range {
                    start: 200,
                    end: 300
                },
                Range {
                    start: 400,
                    end: 500
                },
            ]
        );
        // Looks like a range request but it is continuous, we decline to range
        assert_eq!(
            parse_range_header(b"bytes=0-,-2", 10, None),
            RangeType::None,
        );
        // Should not have multirange info set
        assert!(parse_range_header(b"bytes=0-,-2", 10, None)
            .get_multirange_info()
            .is_none());
        // Overlapping ranges, these ranges are currently declined
        assert_eq!(
            parse_range_header(b"bytes=0-3,2-5", 10, None),
            RangeType::None,
        );
        assert!(parse_range_header(b"bytes=0-3,2-5", 10, None)
            .get_multirange_info()
            .is_none());

        // Content length is 2, so only range is 0-2.
        assert_eq!(
            parse_range_header(b"bytes=0-5,10-", 2, None),
            RangeType::new_single(0, 2)
        );
        assert!(parse_range_header(b"bytes=0-5,10-", 2, None)
            .get_multirange_info()
            .is_none());

        // We should ignore the last incorrect range and return the other acceptable ranges
        assert_eq!(
            parse_range_header(b"bytes=0-5, 10-20, 30-18", 200, None)
                .get_multirange_info()
                .expect("Should have multipart info for Multipart range request")
                .ranges,
            vec![Range { start: 0, end: 6 }, Range { start: 10, end: 21 },]
        );
        // All invalid ranges
        assert_eq!(
            parse_range_header(b"bytes=5-0, 20-15, 30-25", 200, None),
            RangeType::Invalid
        );

        // Helper function to generate a large number of ranges for the next test
        fn generate_range_header(count: usize) -> Vec<u8> {
            let mut s = String::from("bytes=");
            for i in 0..count {
                let start = i * 4;
                let end = start + 1;
                if i > 0 {
                    s.push(',');
                }
                s.push_str(&start.to_string());
                s.push('-');
                s.push_str(&end.to_string());
            }
            s.into_bytes()
        }

        // Test 200 range limit for parsing.
        let ranges = generate_range_header(201);
        assert_eq!(
            parse_range_header(&ranges, 1000, Some(200)),
            RangeType::None
        )
    }

    // For Multipart Requests, we need to know the boundary, content length and type across
    // the headers and the body. So let us store this information as part of the range
    #[derive(Debug, Eq, PartialEq, Clone)]
    pub struct MultiRangeInfo {
        pub ranges: Vec<Range<usize>>,
        pub boundary: String,
        total_length: usize,
        content_type: Option<String>,
    }

    impl MultiRangeInfo {
        // Create a new MultiRangeInfo, when we just have the ranges
        pub fn new(ranges: Vec<Range<usize>>) -> Self {
            Self {
                ranges,
                // Directly create boundary string on initialization
                boundary: Self::generate_boundary(),
                total_length: 0,
                content_type: None,
            }
        }
        pub fn set_content_type(&mut self, content_type: String) {
            self.content_type = Some(content_type)
        }
        pub fn set_total_length(&mut self, total_length: usize) {
            self.total_length = total_length;
        }
        // Per [RFC 9110](https://www.rfc-editor.org/rfc/rfc9110.html#multipart.byteranges),
        // we need generate a boundary string for each body part.
        // Per [RFC 2046](https://www.rfc-editor.org/rfc/rfc2046#section-5.1.1), the boundary should be no longer than 70 characters
        // and it must not match the body content.
        fn generate_boundary() -> String {
            use rand::Rng;
            let mut rng: rand::prelude::ThreadRng = rand::thread_rng();
            format!("{:016x}", rng.gen::<u64>())
        }
        pub fn calculate_multipart_length(&self) -> usize {
            let mut total_length = 0;
            let content_type = self.content_type.as_ref();
            for range in self.ranges.clone() {
                // Each part should have
                // \r\n--boundary\r\n                         --> 4 + boundary.len() (16) + 2 = 20
                // Content-Type: original-content-type\r\n    --> 14 + content_type.len() + 2
                // Content-Range: bytes start-end/total\r\n   --> Variable +2
                // \r\n                                       --> 2
                // [data]                                     --> data.len()
                total_length += 4 + self.boundary.len() + 2;
                total_length += content_type.map_or(0, |ct| 14 + ct.len() + 2);
                total_length += format!(
                    "Content-Range: bytes {}-{}/{}",
                    range.start,
                    range.end - 1,
                    self.total_length
                )
                .len()
                    + 2;
                total_length += 2;
                total_length += range.end - range.start;
            }
            // Final boundary: "\r\n--<boundary>--\r\n"
            total_length += 4 + self.boundary.len() + 4;
            total_length
        }
    }
    #[derive(Debug, Eq, PartialEq, Clone)]
    pub enum RangeType {
        None,
        Single(Range<usize>),
        Multi(MultiRangeInfo),
        Invalid,
    }

    impl RangeType {
        // Helper functions for tests
        #[allow(dead_code)]
        fn new_single(start: usize, end: usize) -> Self {
            RangeType::Single(Range { start, end })
        }
        #[allow(dead_code)]
        pub fn new_multi(ranges: Vec<Range<usize>>) -> Self {
            RangeType::Multi(MultiRangeInfo::new(ranges))
        }
        #[allow(dead_code)]
        fn get_multirange_info(&self) -> Option<&MultiRangeInfo> {
            match self {
                RangeType::Multi(multi_range_info) => Some(multi_range_info),
                _ => None,
            }
        }
        #[allow(dead_code)]
        fn update_multirange_info(&mut self, content_length: usize, content_type: Option<String>) {
            if let RangeType::Multi(multipart_range_info) = self {
                multipart_range_info.content_type = content_type;
                multipart_range_info.set_total_length(content_length);
            }
        }
    }

    // Handles both single-range and multipart-range requests
    pub fn range_header_filter(
        req: &RequestHeader,
        resp: &mut ResponseHeader,
        max_multipart_ranges: Option<usize>,
    ) -> RangeType {
        // The Range header field is evaluated after evaluating the precondition
        // header fields defined in [RFC7232], and only if the result in absence
        // of the Range header field would be a 200 (OK) response
        if resp.status != StatusCode::OK {
            return RangeType::None;
        }

        // Content-Length is not required by RFC but it is what nginx does and easier to implement
        // with this header present.
        let Some(content_length_bytes) = resp.headers.get(CONTENT_LENGTH) else {
            return RangeType::None;
        };
        // bail on invalid content length
        let Some(content_length) = parse_number(content_length_bytes.as_bytes()) else {
            return RangeType::None;
        };

        // At this point the response is allowed to be served as ranges
        // TODO: we can also check Accept-Range header from resp. Nginx gives uses the option
        // see proxy_force_ranges

        fn request_range_type(
            req: &RequestHeader,
            resp: &ResponseHeader,
            content_length: usize,
            max_multipart_ranges: Option<usize>,
        ) -> RangeType {
            // "A server MUST ignore a Range header field received with a request method other than GET."
            if req.method != http::Method::GET && req.method != http::Method::HEAD {
                return RangeType::None;
            }

            let Some(range_header) = req.headers.get(RANGE) else {
                return RangeType::None;
            };

            // if-range wants to understand if the Last-Modified / ETag value matches exactly for use
            // with resumable downloads.
            // https://datatracker.ietf.org/doc/html/rfc9110#name-if-range
            // Note that the RFC wants strong validation, and suggests that
            // "A valid entity-tag can be distinguished from a valid HTTP-date
            // by examining the first three characters for a DQUOTE,"
            // but this current etag matching behavior most closely mirrors nginx.
            if let Some(if_range) = req.headers.get(IF_RANGE) {
                let ir = if_range.as_bytes();
                let matches = if ir.len() >= 2 && ir.last() == Some(&b'"') {
                    resp.headers.get(ETAG).is_some_and(|etag| etag == if_range)
                } else if let Some(last_modified) = resp.headers.get(LAST_MODIFIED) {
                    last_modified == if_range
                } else {
                    false
                };
                if !matches {
                    return RangeType::None;
                }
            }

            parse_range_header(
                range_header.as_bytes(),
                content_length,
                max_multipart_ranges,
            )
        }

        let mut range_type = request_range_type(req, resp, content_length, max_multipart_ranges);

        match &mut range_type {
            RangeType::None => {
                // At this point, the response is _eligible_ to be served in ranges
                // in the future, so add Accept-Ranges, mirroring nginx behavior
                resp.insert_header(&ACCEPT_RANGES, "bytes").unwrap();
            }
            RangeType::Single(r) => {
                // 206 response
                resp.set_status(StatusCode::PARTIAL_CONTENT).unwrap();
                resp.remove_header(&ACCEPT_RANGES);
                resp.insert_header(&CONTENT_LENGTH, r.end - r.start)
                    .unwrap();
                resp.insert_header(
                    &CONTENT_RANGE,
                    format!("bytes {}-{}/{content_length}", r.start, r.end - 1), // range end is inclusive
                )
                .unwrap()
            }

            RangeType::Multi(multi_range_info) => {
                let content_type = resp
                    .headers
                    .get(CONTENT_TYPE)
                    .and_then(|v| v.to_str().ok())
                    .unwrap_or("application/octet-stream");
                // Update multipart info
                multi_range_info.set_total_length(content_length);
                multi_range_info.set_content_type(content_type.to_string());

                let total_length = multi_range_info.calculate_multipart_length();

                resp.set_status(StatusCode::PARTIAL_CONTENT).unwrap();
                resp.remove_header(&ACCEPT_RANGES);
                resp.insert_header(CONTENT_LENGTH, total_length).unwrap();
                resp.insert_header(
                    CONTENT_TYPE,
                    format!(
                        "multipart/byteranges; boundary={}",
                        multi_range_info.boundary
                    ), // RFC 2046
                )
                .unwrap();
                resp.remove_header(&CONTENT_RANGE);
            }
            RangeType::Invalid => {
                // 416 response
                resp.set_status(StatusCode::RANGE_NOT_SATISFIABLE).unwrap();
                // empty body for simplicity
                resp.insert_header(&CONTENT_LENGTH, HeaderValue::from_static("0"))
                    .unwrap();
                resp.remove_header(&ACCEPT_RANGES);
                resp.remove_header(&CONTENT_TYPE);
                resp.remove_header(&CONTENT_ENCODING);
                resp.remove_header(&TRANSFER_ENCODING);
                resp.insert_header(&CONTENT_RANGE, format!("bytes */{content_length}"))
                    .unwrap()
            }
        }

        range_type
    }

    #[test]
    fn test_range_filter_single() {
        fn gen_req() -> RequestHeader {
            RequestHeader::build(http::Method::GET, b"/", Some(1)).unwrap()
        }
        fn gen_resp() -> ResponseHeader {
            let mut resp = ResponseHeader::build(200, Some(1)).unwrap();
            resp.append_header("Content-Length", "10").unwrap();
            resp
        }

        // no range
        let req = gen_req();
        let mut resp = gen_resp();
        assert_eq!(RangeType::None, range_header_filter(&req, &mut resp, None));
        assert_eq!(resp.status.as_u16(), 200);
        assert_eq!(
            resp.headers.get("accept-ranges").unwrap().as_bytes(),
            b"bytes"
        );

        // no range, try HEAD
        let mut req = gen_req();
        req.method = Method::HEAD;
        let mut resp = gen_resp();
        assert_eq!(RangeType::None, range_header_filter(&req, &mut resp, None));
        assert_eq!(resp.status.as_u16(), 200);
        assert_eq!(
            resp.headers.get("accept-ranges").unwrap().as_bytes(),
            b"bytes"
        );

        // regular range
        let mut req = gen_req();
        req.insert_header("Range", "bytes=0-1").unwrap();
        let mut resp = gen_resp();
        assert_eq!(
            RangeType::new_single(0, 2),
            range_header_filter(&req, &mut resp, None)
        );
        assert_eq!(resp.status.as_u16(), 206);
        assert_eq!(resp.headers.get("content-length").unwrap().as_bytes(), b"2");
        assert_eq!(
            resp.headers.get("content-range").unwrap().as_bytes(),
            b"bytes 0-1/10"
        );
        assert!(resp.headers.get("accept-ranges").is_none());

        // regular range, accept-ranges included
        let mut req = gen_req();
        req.insert_header("Range", "bytes=0-1").unwrap();
        let mut resp = gen_resp();
        resp.insert_header("Accept-Ranges", "bytes").unwrap();
        assert_eq!(
            RangeType::new_single(0, 2),
            range_header_filter(&req, &mut resp, None)
        );
        assert_eq!(resp.status.as_u16(), 206);
        assert_eq!(resp.headers.get("content-length").unwrap().as_bytes(), b"2");
        assert_eq!(
            resp.headers.get("content-range").unwrap().as_bytes(),
            b"bytes 0-1/10"
        );
        // accept-ranges stripped
        assert!(resp.headers.get("accept-ranges").is_none());

        // bad range
        let mut req = gen_req();
        req.insert_header("Range", "bytes=1-0").unwrap();
        let mut resp = gen_resp();
        resp.insert_header("Accept-Ranges", "bytes").unwrap();
        resp.insert_header("Content-Encoding", "gzip").unwrap();
        resp.insert_header("Transfer-Encoding", "chunked").unwrap();
        assert_eq!(
            RangeType::Invalid,
            range_header_filter(&req, &mut resp, None)
        );
        assert_eq!(resp.status.as_u16(), 416);
        assert_eq!(resp.headers.get("content-length").unwrap().as_bytes(), b"0");
        assert_eq!(
            resp.headers.get("content-range").unwrap().as_bytes(),
            b"bytes */10"
        );
        assert!(resp.headers.get("accept-ranges").is_none());
        assert!(resp.headers.get("content-encoding").is_none());
        assert!(resp.headers.get("transfer-encoding").is_none());
    }

    // Multipart Tests
    #[test]
    fn test_range_filter_multipart() {
        fn gen_req() -> RequestHeader {
            let mut req: RequestHeader =
                RequestHeader::build(http::Method::GET, b"/", Some(1)).unwrap();
            req.append_header("Range", "bytes=0-1,3-4,6-7").unwrap();
            req
        }
        fn gen_req_overlap_range() -> RequestHeader {
            let mut req: RequestHeader =
                RequestHeader::build(http::Method::GET, b"/", Some(1)).unwrap();
            req.append_header("Range", "bytes=0-3,2-5,7-8").unwrap();
            req
        }
        fn gen_resp() -> ResponseHeader {
            let mut resp = ResponseHeader::build(200, Some(1)).unwrap();
            resp.append_header("Content-Length", "10").unwrap();
            resp
        }

        // valid multipart range
        let req = gen_req();
        let mut resp = gen_resp();
        let result = range_header_filter(&req, &mut resp, None);
        let mut boundary_str = String::new();

        assert!(matches!(result, RangeType::Multi(_)));
        if let RangeType::Multi(multi_part_info) = result {
            assert_eq!(multi_part_info.ranges.len(), 3);
            assert_eq!(multi_part_info.ranges[0], Range { start: 0, end: 2 });
            assert_eq!(multi_part_info.ranges[1], Range { start: 3, end: 5 });
            assert_eq!(multi_part_info.ranges[2], Range { start: 6, end: 8 });
            // Verify that multipart info has been set
            assert!(multi_part_info.content_type.is_some());
            assert_eq!(multi_part_info.total_length, 10);
            assert!(!multi_part_info.boundary.is_empty());
            boundary_str = multi_part_info.boundary;
        }
        assert_eq!(resp.status.as_u16(), 206);
        // Verify that boundary is the same in header and in multipartinfo
        assert_eq!(
            resp.headers.get("content-type").unwrap().to_str().unwrap(),
            format!("multipart/byteranges; boundary={boundary_str}")
        );
        assert!(resp.headers.get("content_length").is_none());
        assert!(resp.headers.get("accept-ranges").is_none());

        // overlapping range, multipart range is declined
        let req = gen_req_overlap_range();
        let mut resp = gen_resp();
        let result = range_header_filter(&req, &mut resp, None);

        assert!(matches!(result, RangeType::None));
        assert_eq!(resp.status.as_u16(), 200);
        assert!(resp.headers.get("content-type").is_none());
        assert_eq!(
            resp.headers.get("accept-ranges").unwrap().as_bytes(),
            b"bytes"
        );

        // bad multipart range
        let mut req = gen_req();
        req.insert_header("Range", "bytes=1-0, 12-9, 50-40")
            .unwrap();
        let mut resp = gen_resp();
        resp.insert_header("Content-Encoding", "br").unwrap();
        resp.insert_header("Transfer-Encoding", "chunked").unwrap();
        let result = range_header_filter(&req, &mut resp, None);
        assert!(matches!(result, RangeType::Invalid));
        assert_eq!(resp.status.as_u16(), 416);
        assert!(resp.headers.get("accept-ranges").is_none());
        assert!(resp.headers.get("content-encoding").is_none());
        assert!(resp.headers.get("transfer-encoding").is_none());
    }

    #[test]
    fn test_if_range() {
        const DATE: &str = "Fri, 07 Jul 2023 22:03:29 GMT";
        const ETAG: &str = "\"1234\"";

        fn gen_req() -> RequestHeader {
            let mut req = RequestHeader::build(http::Method::GET, b"/", Some(1)).unwrap();
            req.append_header("Range", "bytes=0-1").unwrap();
            req
        }
        fn get_multipart_req() -> RequestHeader {
            let mut req = RequestHeader::build(http::Method::GET, b"/", Some(1)).unwrap();
            _ = req.append_header("Range", "bytes=0-1,3-4,6-7");
            req
        }
        fn gen_resp() -> ResponseHeader {
            let mut resp = ResponseHeader::build(200, Some(1)).unwrap();
            resp.append_header("Content-Length", "10").unwrap();
            resp.append_header("Last-Modified", DATE).unwrap();
            resp.append_header("ETag", ETAG).unwrap();
            resp
        }

        // matching Last-Modified date
        let mut req = gen_req();
        req.insert_header("If-Range", DATE).unwrap();
        let mut resp = gen_resp();
        assert_eq!(
            RangeType::new_single(0, 2),
            range_header_filter(&req, &mut resp, None)
        );

        // non-matching date
        let mut req = gen_req();
        req.insert_header("If-Range", "Fri, 07 Jul 2023 22:03:25 GMT")
            .unwrap();
        let mut resp = gen_resp();
        assert_eq!(RangeType::None, range_header_filter(&req, &mut resp, None));
        assert_eq!(resp.status.as_u16(), 200);
        assert_eq!(
            resp.headers.get("accept-ranges").unwrap().as_bytes(),
            b"bytes"
        );

        // match ETag
        let mut req = gen_req();
        req.insert_header("If-Range", ETAG).unwrap();
        let mut resp = gen_resp();
        assert_eq!(
            RangeType::new_single(0, 2),
            range_header_filter(&req, &mut resp, None)
        );
        assert_eq!(resp.status.as_u16(), 206);
        assert!(resp.headers.get("accept-ranges").is_none());

        // non-matching ETags do not result in range
        let mut req = gen_req();
        req.insert_header("If-Range", "\"4567\"").unwrap();
        let mut resp = gen_resp();
        assert_eq!(RangeType::None, range_header_filter(&req, &mut resp, None));
        assert_eq!(resp.status.as_u16(), 200);
        assert_eq!(
            resp.headers.get("accept-ranges").unwrap().as_bytes(),
            b"bytes"
        );

        let mut req = gen_req();
        req.insert_header("If-Range", "1234").unwrap();
        let mut resp = gen_resp();
        assert_eq!(RangeType::None, range_header_filter(&req, &mut resp, None));
        assert_eq!(resp.status.as_u16(), 200);
        assert_eq!(
            resp.headers.get("accept-ranges").unwrap().as_bytes(),
            b"bytes"
        );

        // multipart range with If-Range
        let mut req = get_multipart_req();
        req.insert_header("If-Range", DATE).unwrap();
        let mut resp = gen_resp();
        let result = range_header_filter(&req, &mut resp, None);
        assert!(matches!(result, RangeType::Multi(_)));
        assert_eq!(resp.status.as_u16(), 206);
        assert!(resp.headers.get("accept-ranges").is_none());

        // multipart with matching ETag
        let req = get_multipart_req();
        let mut resp = gen_resp();
        assert!(matches!(
            range_header_filter(&req, &mut resp, None),
            RangeType::Multi(_)
        ));

        // multipart with non-matching If-Range
        let mut req = get_multipart_req();
        req.insert_header("If-Range", "\"wrong\"").unwrap();
        let mut resp = gen_resp();
        assert_eq!(RangeType::None, range_header_filter(&req, &mut resp, None));
        assert_eq!(resp.status.as_u16(), 200);
        assert_eq!(
            resp.headers.get("accept-ranges").unwrap().as_bytes(),
            b"bytes"
        );
    }

    pub struct RangeBodyFilter {
        pub range: RangeType,
        current: usize,
        multipart_idx: Option<usize>,
        cache_multipart_idx: Option<usize>,
    }

    impl Default for RangeBodyFilter {
        fn default() -> Self {
            Self::new()
        }
    }

    impl RangeBodyFilter {
        pub fn new() -> Self {
            RangeBodyFilter {
                range: RangeType::None,
                current: 0,
                multipart_idx: None,
                cache_multipart_idx: None,
            }
        }

        pub fn new_range(range: RangeType) -> Self {
            RangeBodyFilter {
                multipart_idx: matches!(range, RangeType::Multi(_)).then_some(0),
                range,
                ..Default::default()
            }
        }

        pub fn is_multipart_range(&self) -> bool {
            matches!(self.range, RangeType::Multi(_))
        }

        /// Whether we should expect the cache body reader to seek again
        /// for a different range.
        pub fn should_cache_seek_again(&self) -> bool {
            match &self.range {
                RangeType::Multi(multipart_info) => self
                    .cache_multipart_idx
                    .is_some_and(|idx| idx != multipart_info.ranges.len() - 1),
                _ => false,
            }
        }

        /// Returns the next multipart range to seek for the cache body reader.
        pub fn next_cache_multipart_range(&mut self) -> Range<usize> {
            match &self.range {
                RangeType::Multi(multipart_info) => {
                    match self.cache_multipart_idx.as_mut() {
                        Some(v) => *v += 1,
                        None => self.cache_multipart_idx = Some(0),
                    }
                    let cache_multipart_idx = self.cache_multipart_idx.expect("set above");
                    let multipart_idx = self.multipart_idx.expect("must be set on multirange");
                    // NOTE: currently this assumes once we start seeking multipart from the hit
                    // handler, it will continue to return can_seek_multipart true.
                    assert_eq!(multipart_idx, cache_multipart_idx,
                        "cache multipart idx should match multipart idx, or there is a hit handler bug");
                    multipart_info.ranges[cache_multipart_idx].clone()
                }
                _ => panic!("tried to advance multipart idx on non-multipart range"),
            }
        }

        pub fn set_current_cursor(&mut self, current: usize) {
            self.current = current;
        }

        pub fn set(&mut self, range: RangeType) {
            self.multipart_idx = matches!(range, RangeType::Multi(_)).then_some(0);
            self.range = range;
        }

        // Emit final boundary footer for multipart requests
        pub fn finalize(&self, boundary: &String) -> Option<Bytes> {
            if let RangeType::Multi(_) = self.range {
                Some(Bytes::from(format!("\r\n--{boundary}--\r\n")))
            } else {
                None
            }
        }

        pub fn filter_body(&mut self, data: Option<Bytes>) -> Option<Bytes> {
            match &self.range {
                RangeType::None => data,
                RangeType::Invalid => None,
                RangeType::Single(r) => {
                    let current = self.current;
                    self.current += data.as_ref().map_or(0, |d| d.len());
                    data.and_then(|d| Self::filter_range_data(r.start, r.end, current, d))
                }

                RangeType::Multi(_) => {
                    let data = data?;
                    let current = self.current;
                    let data_len = data.len();
                    self.current += data_len;
                    self.filter_multi_range_body(data, current, data_len)
                }
            }
        }

        fn filter_range_data(
            start: usize,
            end: usize,
            current: usize,
            data: Bytes,
        ) -> Option<Bytes> {
            if current + data.len() < start || current >= end {
                // if the current data is out side the desired range, just drop the data
                None
            } else if current >= start && current + data.len() <= end {
                // all data is within the slice
                Some(data)
            } else {
                // data:  current........current+data.len()
                // range: start...........end
                let slice_start = start.saturating_sub(current);
                let slice_end = std::cmp::min(data.len(), end - current);
                Some(data.slice(slice_start..slice_end))
            }
        }

        // Returns the multipart header for a given range
        fn build_multipart_header(
            &self,
            range: &Range<usize>,
            boundary: &str,
            total_length: &usize,
            content_type: Option<&str>,
        ) -> Bytes {
            Bytes::from(format!(
                "\r\n--{}\r\n{}Content-Range: bytes {}-{}/{}\r\n\r\n",
                boundary,
                content_type.map_or(String::new(), |ct| format!("Content-Type: {ct}\r\n")),
                range.start,
                range.end - 1,
                total_length
            ))
        }

        // Return true if chunk includes the start of the given range
        fn current_chunk_includes_range_start(
            &self,
            range: &Range<usize>,
            current: usize,
            data_len: usize,
        ) -> bool {
            range.start >= current && range.start < current + data_len
        }

        // Return true if chunk includes the end of the given range
        fn current_chunk_includes_range_end(
            &self,
            range: &Range<usize>,
            current: usize,
            data_len: usize,
        ) -> bool {
            range.end > current && range.end <= current + data_len
        }

        fn filter_multi_range_body(
            &mut self,
            data: Bytes,
            current: usize,
            data_len: usize,
        ) -> Option<Bytes> {
            let mut result = BytesMut::new();

            let RangeType::Multi(multi_part_info) = &self.range else {
                return None;
            };

            let multipart_idx = self.multipart_idx.expect("must be set on multirange");
            let final_range = multi_part_info.ranges.last()?;

            let (_, remaining_ranges) = multi_part_info.ranges.as_slice().split_at(multipart_idx);
            // NOTE: current invariant is that the multipart info ranges are disjoint ascending
            // this code is invalid if this invariant is not upheld
            for range in remaining_ranges {
                if let Some(sliced) =
                    Self::filter_range_data(range.start, range.end, current, data.clone())
                {
                    if self.current_chunk_includes_range_start(range, current, data_len) {
                        result.extend_from_slice(&self.build_multipart_header(
                            range,
                            multi_part_info.boundary.as_ref(),
                            &multi_part_info.total_length,
                            multi_part_info.content_type.as_deref(),
                        ));
                    }
                    // Emit the actual data bytes
                    result.extend_from_slice(&sliced);
                    if self.current_chunk_includes_range_end(range, current, data_len) {
                        // If this was the last range, we should emit the final footer too
                        if range == final_range {
                            if let Some(final_chunk) = self.finalize(&multi_part_info.boundary) {
                                result.extend_from_slice(&final_chunk);
                            }
                        }
                        // done with this range
                        self.multipart_idx = Some(self.multipart_idx.expect("must be set") + 1);
                    }
                } else {
                    // no part of the data was within this range,
                    // so lower bound of this range (and remaining ranges) must be
                    // > current + data_len
                    break;
                }
            }
            if result.is_empty() {
                None
            } else {
                Some(result.freeze())
            }
        }
    }

    #[test]
    fn test_range_body_filter_single() {
        let mut body_filter = RangeBodyFilter::new_range(RangeType::None);
        assert_eq!(body_filter.filter_body(Some("123".into())).unwrap(), "123");

        let mut body_filter = RangeBodyFilter::new_range(RangeType::Invalid);
        assert!(body_filter.filter_body(Some("123".into())).is_none());

        let mut body_filter = RangeBodyFilter::new_range(RangeType::new_single(0, 1));
        assert_eq!(body_filter.filter_body(Some("012".into())).unwrap(), "0");
        assert!(body_filter.filter_body(Some("345".into())).is_none());

        let mut body_filter = RangeBodyFilter::new_range(RangeType::new_single(4, 6));
        assert!(body_filter.filter_body(Some("012".into())).is_none());
        assert_eq!(body_filter.filter_body(Some("345".into())).unwrap(), "45");
        assert!(body_filter.filter_body(Some("678".into())).is_none());

        let mut body_filter = RangeBodyFilter::new_range(RangeType::new_single(1, 7));
        assert_eq!(body_filter.filter_body(Some("012".into())).unwrap(), "12");
        assert_eq!(body_filter.filter_body(Some("345".into())).unwrap(), "345");
        assert_eq!(body_filter.filter_body(Some("678".into())).unwrap(), "6");
    }

    #[test]
    fn test_range_body_filter_multipart() {
        // Test #1 - Test multipart ranges from 1 chunk
        let data = Bytes::from("0123456789");
        let ranges = vec![0..3, 6..9];
        let content_length = data.len();
        let mut body_filter = RangeBodyFilter::new();
        body_filter.set(RangeType::new_multi(ranges.clone()));

        body_filter
            .range
            .update_multirange_info(content_length, None);

        let multi_range_info = body_filter
            .range
            .get_multirange_info()
            .cloned()
            .expect("Multipart Ranges should have MultiPartInfo struct");

        // Pass the whole body in one chunk
        let output = body_filter.filter_body(Some(data)).unwrap();
        let footer = body_filter.finalize(&multi_range_info.boundary).unwrap();

        // Convert to String so that we can inspect whole response
        let output_str = str::from_utf8(&output).unwrap();
        let final_boundary = str::from_utf8(&footer).unwrap();
        let boundary = &multi_range_info.boundary;

        // Check part headers
        for (i, range) in ranges.iter().enumerate() {
            let header = &format!(
                "--{}\r\nContent-Range: bytes {}-{}/{}\r\n\r\n",
                boundary,
                range.start,
                range.end - 1,
                content_length
            );
            assert!(
                output_str.contains(header),
                "Missing part header {} in multipart body",
                i
            );
            // Check body matches
            let expected_body = &"0123456789"[range.clone()];
            assert!(
                output_str.contains(expected_body),
                "Missing body {} for range {:?}",
                expected_body,
                range
            )
        }
        // Check the final boundary footer
        assert_eq!(final_boundary, format!("\r\n--{}--\r\n", boundary));

        // Test #2 - Test multipart ranges from multiple chunks
        let full_body = b"0123456789";
        let ranges = vec![0..2, 4..6, 8..9];
        let content_length = full_body.len();
        let content_type = "text/plain".to_string();
        let mut body_filter = RangeBodyFilter::new();
        body_filter.set(RangeType::new_multi(ranges.clone()));

        body_filter
            .range
            .update_multirange_info(content_length, Some(content_type.clone()));

        let multi_range_info = body_filter
            .range
            .get_multirange_info()
            .cloned()
            .expect("Multipart Ranges should have MultiPartInfo struct");

        // Split the body into 4 chunks
        let chunk1 = Bytes::from_static(b"012");
        let chunk2 = Bytes::from_static(b"345");
        let chunk3 = Bytes::from_static(b"678");
        let chunk4 = Bytes::from_static(b"9");

        let mut collected_bytes = BytesMut::new();
        for chunk in [chunk1, chunk2, chunk3, chunk4] {
            if let Some(filtered) = body_filter.filter_body(Some(chunk)) {
                collected_bytes.extend_from_slice(&filtered);
            }
        }
        if let Some(final_boundary) = body_filter.finalize(&multi_range_info.boundary) {
            collected_bytes.extend_from_slice(&final_boundary);
        }

        let output_str = str::from_utf8(&collected_bytes).unwrap();
        let boundary = multi_range_info.boundary;

        for (i, range) in ranges.iter().enumerate() {
            let header = &format!(
                "--{}\r\nContent-Type: {}\r\nContent-Range: bytes {}-{}/{}\r\n\r\n",
                boundary,
                content_type,
                range.start,
                range.end - 1,
                content_length
            );
            let expected_body = &full_body[range.clone()];
            let expected_output = format!("{}{}", header, str::from_utf8(expected_body).unwrap());

            assert!(
                output_str.contains(&expected_output),
                "Missing or malformed part {} in multipart body. \n Expected: \n{}\n Got: \n{}",
                i,
                expected_output,
                output_str
            )
        }

        assert!(
            output_str.ends_with(&format!("\r\n--{}--\r\n", boundary)),
            "Missing final boundary"
        );

        // Test #3 - Test multipart ranges from multiple chunks, with ranges spanning chunks
        let full_body = b"abcdefghijkl";
        let ranges = vec![2..7, 9..11];
        let content_length = full_body.len();
        let content_type = "application/octet-stream".to_string();
        let mut body_filter = RangeBodyFilter::new();
        body_filter.set(RangeType::new_multi(ranges.clone()));

        body_filter
            .range
            .update_multirange_info(content_length, Some(content_type.clone()));

        let multi_range_info = body_filter
            .range
            .clone()
            .get_multirange_info()
            .cloned()
            .expect("Multipart Ranges should have MultiPartInfo struct");

        // Split the body into 4 chunks
        let chunk1 = Bytes::from_static(b"abc");
        let chunk2 = Bytes::from_static(b"def");
        let chunk3 = Bytes::from_static(b"ghi");
        let chunk4 = Bytes::from_static(b"jkl");

        let mut collected_bytes = BytesMut::new();
        for chunk in [chunk1, chunk2, chunk3, chunk4] {
            if let Some(filtered) = body_filter.filter_body(Some(chunk)) {
                collected_bytes.extend_from_slice(&filtered);
            }
        }
        if let Some(final_boundary) = body_filter.finalize(&multi_range_info.boundary) {
            collected_bytes.extend_from_slice(&final_boundary);
        }

        let output_str = str::from_utf8(&collected_bytes).unwrap();
        let boundary = &multi_range_info.boundary;

        let header1 = &format!(
            "--{}\r\nContent-Type: {}\r\nContent-Range: bytes {}-{}/{}\r\n\r\n",
            boundary,
            content_type,
            ranges[0].start,
            ranges[0].end - 1,
            content_length
        );
        let header2 = &format!(
            "--{}\r\nContent-Type: {}\r\nContent-Range: bytes {}-{}/{}\r\n\r\n",
            boundary,
            content_type,
            ranges[1].start,
            ranges[1].end - 1,
            content_length
        );

        assert!(output_str.contains(header1));
        assert!(output_str.contains(header2));

        let expected_body_slices = ["cdefg", "jk"];

        assert!(
            output_str.contains(expected_body_slices[0]),
            "Missing expected sliced body {}",
            expected_body_slices[0]
        );

        assert!(
            output_str.contains(expected_body_slices[1]),
            "Missing expected sliced body {}",
            expected_body_slices[1]
        );

        assert!(
            output_str.ends_with(&format!("\r\n--{}--\r\n", boundary)),
            "Missing final boundary"
        );
    }
}

// a state machine for proxy logic to tell when to use cache in the case of
// miss/revalidation/error.
#[derive(Debug)]
pub(crate) enum ServeFromCache {
    // not using cache
    Off,
    // should serve cache header
    CacheHeader,
    // should serve cache header only
    CacheHeaderOnly,
    // should serve cache header only but upstream response should be admitted to cache
    CacheHeaderOnlyMiss,
    // should serve cache body with a bool to indicate if it has already called seek on the hit handler
    CacheBody(bool),
    // should serve cache header but upstream response should be admitted to cache
    // This is the starting state for misses, which go to CacheBodyMiss or
    // CacheHeaderOnlyMiss before ending at DoneMiss
    CacheHeaderMiss,
    // should serve cache body but upstream response should be admitted to cache, bool to indicate seek status
    CacheBodyMiss(bool),
    // done serving cache body
    Done,
    // done serving cache body, but upstream response should continue to be admitted to cache
    DoneMiss,
}

impl ServeFromCache {
    pub fn new() -> Self {
        Self::Off
    }

    pub fn is_on(&self) -> bool {
        !matches!(self, Self::Off)
    }

    pub fn is_miss(&self) -> bool {
        matches!(
            self,
            Self::CacheHeaderMiss
                | Self::CacheHeaderOnlyMiss
                | Self::CacheBodyMiss(_)
                | Self::DoneMiss
        )
    }

    pub fn is_miss_header(&self) -> bool {
        // NOTE: this check is for checking if miss was just enabled, so it is excluding
        // HeaderOnlyMiss
        matches!(self, Self::CacheHeaderMiss)
    }

    pub fn is_miss_body(&self) -> bool {
        matches!(self, Self::CacheBodyMiss(_))
    }

    pub fn should_discard_upstream(&self) -> bool {
        self.is_on() && !self.is_miss()
    }

    pub fn should_send_to_downstream(&self) -> bool {
        !self.is_on()
    }

    pub fn enable(&mut self) {
        *self = Self::CacheHeader;
    }

    pub fn enable_miss(&mut self) {
        if !self.is_on() {
            *self = Self::CacheHeaderMiss;
        }
    }

    pub fn enable_header_only(&mut self) {
        match self {
            Self::CacheBody(_) => *self = Self::Done, // TODO: make sure no body is read yet
            Self::CacheBodyMiss(_) => *self = Self::DoneMiss,
            _ => {
                if self.is_miss() {
                    *self = Self::CacheHeaderOnlyMiss;
                } else {
                    *self = Self::CacheHeaderOnly;
                }
            }
        }
    }

    // This function is (best effort) cancel-safe to be used in select
    pub async fn next_http_task(
        &mut self,
        cache: &mut HttpCache,
        range: &mut RangeBodyFilter,
        upgraded: bool,
    ) -> Result<HttpTask> {
        fn body_task(data: Bytes, upgraded: bool) -> HttpTask {
            if upgraded {
                HttpTask::UpgradedBody(Some(data), false)
            } else {
                HttpTask::Body(Some(data), false)
            }
        }

        if !cache.enabled() {
            // Cache is disabled due to internal error
            // TODO: if nothing is sent to eyeball yet, figure out a way to recovery by
            // fetching from upstream
            return Error::e_explain(InternalError, "Cache disabled");
        }
        match self {
            Self::Off => panic!("ProxyUseCache not enabled"),
            Self::CacheHeader => {
                *self = Self::CacheBody(true);
                Ok(HttpTask::Header(cache_hit_header(cache), false)) // false for now
            }
            Self::CacheHeaderMiss => {
                *self = Self::CacheBodyMiss(true);
                Ok(HttpTask::Header(cache_hit_header(cache), false)) // false for now
            }
            Self::CacheHeaderOnly => {
                *self = Self::Done;
                Ok(HttpTask::Header(cache_hit_header(cache), true))
            }
            Self::CacheHeaderOnlyMiss => {
                *self = Self::DoneMiss;
                Ok(HttpTask::Header(cache_hit_header(cache), true))
            }
            Self::CacheBody(should_seek) => {
                log::trace!("cache body should seek: {should_seek}");
                if *should_seek {
                    self.maybe_seek_hit_handler(cache, range)?;
                }
                loop {
                    if let Some(b) = cache.hit_handler().read_body().await? {
                        return Ok(body_task(b, upgraded));
                    }
                    // EOF from hit handler for body requested
                    // if multipart, then seek again
                    if range.should_cache_seek_again() {
                        self.maybe_seek_hit_handler(cache, range)?;
                    } else {
                        *self = Self::Done;
                        return Ok(HttpTask::Done);
                    }
                }
            }
            Self::CacheBodyMiss(should_seek) => {
                if *should_seek {
                    self.maybe_seek_miss_handler(cache, range)?;
                }
                // safety: caller of enable_miss() call it only if the async_body_reader exist
                loop {
                    if let Some(b) = cache.miss_body_reader().unwrap().read_body().await? {
                        return Ok(body_task(b, upgraded));
                    } else {
                        // EOF from hit handler for body requested
                        // if multipart, then seek again
                        if range.should_cache_seek_again() {
                            self.maybe_seek_miss_handler(cache, range)?;
                        } else {
                            *self = Self::DoneMiss;
                            return Ok(HttpTask::Done);
                        }
                    }
                }
            }
            Self::Done => Ok(HttpTask::Done),
            Self::DoneMiss => Ok(HttpTask::Done),
        }
    }

    fn maybe_seek_miss_handler(
        &mut self,
        cache: &mut HttpCache,
        range_filter: &mut RangeBodyFilter,
    ) -> Result<()> {
        match &range_filter.range {
            RangeType::Single(range) => {
                // safety: called only if the async_body_reader exists
                if cache.miss_body_reader().unwrap().can_seek() {
                    cache
                        .miss_body_reader()
                        // safety: called only if the async_body_reader exists
                        .unwrap()
                        .seek(range.start, Some(range.end))
                        .or_err(InternalError, "cannot seek miss handler")?;
                    // Because the miss body reader is seeking, we no longer need the
                    // RangeBodyFilter's help to return the requested byte range.
                    range_filter.range = RangeType::None;
                }
            }
            RangeType::Multi(_info) => {
                // safety: called only if the async_body_reader exists
                if cache.miss_body_reader().unwrap().can_seek_multipart() {
                    let range = range_filter.next_cache_multipart_range();
                    cache
                        .miss_body_reader()
                        .unwrap()
                        .seek_multipart(range.start, Some(range.end))
                        .or_err(InternalError, "cannot seek hit handler for multirange")?;
                    // we still need RangeBodyFilter's help to transform the byte
                    // range into a multipart response.
                    range_filter.set_current_cursor(range.start);
                }
            }
            _ => {}
        }

        *self = Self::CacheBodyMiss(false);
        Ok(())
    }

    fn maybe_seek_hit_handler(
        &mut self,
        cache: &mut HttpCache,
        range_filter: &mut RangeBodyFilter,
    ) -> Result<()> {
        match &range_filter.range {
            RangeType::Single(range) => {
                if cache.hit_handler().can_seek() {
                    cache
                        .hit_handler()
                        .seek(range.start, Some(range.end))
                        .or_err(InternalError, "cannot seek hit handler")?;
                    // Because the hit handler is seeking, we no longer need the
                    // RangeBodyFilter's help to return the requested byte range.
                    range_filter.range = RangeType::None;
                }
            }
            RangeType::Multi(_info) => {
                if cache.hit_handler().can_seek_multipart() {
                    let range = range_filter.next_cache_multipart_range();
                    cache
                        .hit_handler()
                        .seek_multipart(range.start, Some(range.end))
                        .or_err(InternalError, "cannot seek hit handler for multirange")?;
                    // we still need RangeBodyFilter's help to transform the byte
                    // range into a multipart response.
                    range_filter.set_current_cursor(range.start);
                }
            }
            _ => {}
        }
        *self = Self::CacheBody(false);
        Ok(())
    }
}


================================================
FILE: pingora-proxy/src/proxy_common.rs
================================================
/// Possible downstream states during request multiplexing
#[derive(Debug, Clone, Copy)]
pub(crate) enum DownstreamStateMachine {
    /// more request (body) to read
    Reading,
    /// no more data to read
    ReadingFinished,
    /// downstream is already errored or closed
    Errored,
}

#[allow(clippy::wrong_self_convention)]
impl DownstreamStateMachine {
    pub fn new(finished: bool) -> Self {
        if finished {
            Self::ReadingFinished
        } else {
            Self::Reading
        }
    }

    // Can call read() to read more data or wait on closing
    pub fn can_poll(&self) -> bool {
        !matches!(self, Self::Errored)
    }

    pub fn is_reading(&self) -> bool {
        matches!(self, Self::Reading)
    }

    pub fn is_done(&self) -> bool {
        !matches!(self, Self::Reading)
    }

    pub fn is_errored(&self) -> bool {
        matches!(self, Self::Errored)
    }

    /// Move the state machine to Finished state if `set` is true
    pub fn maybe_finished(&mut self, set: bool) {
        if set {
            *self = Self::ReadingFinished
        }
    }

    /// Reset if we should continue reading from the downstream again.
    /// Only used with upgraded connections when body mode changes.
    pub fn reset(&mut self) {
        *self = Self::Reading;
    }

    pub fn to_errored(&mut self) {
        *self = Self::Errored
    }
}

/// Possible upstream states during request multiplexing
#[derive(Debug, Clone, Copy)]
pub(crate) struct ResponseStateMachine {
    upstream_response_done: bool,
    cached_response_done: bool,
}

impl ResponseStateMachine {
    pub fn new() -> Self {
        ResponseStateMachine {
            upstream_response_done: false,
            cached_response_done: true, // no cached response by default
        }
    }

    pub fn is_done(&self) -> bool {
        self.upstream_response_done && self.cached_response_done
    }

    pub fn upstream_done(&self) -> bool {
        self.upstream_response_done
    }

    pub fn cached_done(&self) -> bool {
        self.cached_response_done
    }

    pub fn enable_cached_response(&mut self) {
        self.cached_response_done = false;
    }

    pub fn maybe_set_upstream_done(&mut self, done: bool) {
        if done {
            self.upstream_response_done = true;
        }
    }

    pub fn maybe_set_cache_done(&mut self, done: bool) {
        if done {
            self.cached_response_done = true;
        }
    }
}


================================================
FILE: pingora-proxy/src/proxy_custom.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use futures::StreamExt;
use pingora_core::{
    protocols::http::custom::{
        client::Session as CustomSession, is_informational_except_101, BodyWrite,
        CustomMessageWrite, CUSTOM_MESSAGE_QUEUE_SIZE,
    },
    ImmutStr,
};
use proxy_cache::{range_filter::RangeBodyFilter, ServeFromCache};
use proxy_common::{DownstreamStateMachine, ResponseStateMachine};
use tokio::sync::oneshot;

use super::*;

impl<SV, C> HttpProxy<SV, C>
where
    C: custom::Connector,
{
    /// Proxy to a custom protocol upstream.
    /// Returns (reuse_server, error)
    pub(crate) async fn proxy_to_custom_upstream(
        &self,
        session: &mut Session,
        client_session: &mut C::Session,
        reused: bool,
        peer: &HttpPeer,
        ctx: &mut SV::CTX,
    ) -> (bool, Option<Box<Error>>)
    where
        SV: ProxyHttp + Send + Sync,
        SV::CTX: Send + Sync,
    {
        #[cfg(windows)]
        let raw = client_session.fd() as std::os::windows::io::RawSocket;
        #[cfg(unix)]
        let raw = client_session.fd();

        if let Err(e) = self
            .inner
            .connected_to_upstream(session, reused, peer, raw, client_session.digest(), ctx)
            .await
        {
            return (false, Some(e));
        }

        let (server_session_reuse, error) = self
            .custom_proxy_down_to_up(session, client_session, peer, ctx)
            .await;

        // Parity with H1/H2: custom upstreams don't report payload bytes; record 0.
        session.set_upstream_body_bytes_received(0);

        (server_session_reuse, error)
    }

    /// Handle custom protocol proxying from downstream to upstream.
    /// Returns (reuse_server, error)
    async fn custom_proxy_down_to_up(
        &self,
        session: &mut Session,
        client_session: &mut C::Session,
        peer: &HttpPeer,
        ctx: &mut SV::CTX,
    ) -> (bool, Option<Box<Error>>)
    where
        SV: ProxyHttp + Send + Sync,
        SV::CTX: Send + Sync,
    {
        let mut req = session.req_header().clone();

        if session.cache.enabled() {
            pingora_cache::filters::upstream::request_filter(
                &mut req,
                session.cache.maybe_cache_meta(),
            );
            session.mark_upstream_headers_mutated_for_cache();
        }

        match self
            .inner
            .upstream_request_filter(session, &mut req, ctx)
            .await
        {
            Ok(_) => { /* continue */ }
            Err(e) => {
                return (false, Some(e));
            }
        }

        session.upstream_compression.request_filter(&req);
        let body_empty = session.as_mut().is_body_empty();

        debug!("Request to custom: {req:?}");

        let req = Box::new(req);
        if let Err(e) = client_session.write_request_header(req, body_empty).await {
            return (false, Some(e.into_up()));
        }

        client_session.set_read_timeout(peer.options.read_timeout);
        client_session.set_write_timeout(peer.options.write_timeout);

        // take the body writer out of the client for easy duplex
        let mut client_body = client_session
            .take_request_body_writer()
            .expect("already send request header");

        let (tx, rx) = mpsc::channel::<HttpTask>(TASK_BUFFER_SIZE);

        session.as_mut().enable_retry_buffering();

        // Custom message logic

        let Some(mut upstream_custom_message_reader) = client_session.take_custom_message_reader()
        else {
            return (
                false,
                Some(Error::explain(
                    ReadError,
                    "can't extract custom reader from upstream",
                )),
            );
        };

        let Some(mut upstream_custom_message_writer) = client_session.take_custom_message_writer()
        else {
            return (
                false,
                Some(Error::explain(
                    WriteError,
                    "custom upstream must have a custom message writer",
                )),
            );
        };

        // A channel to inject custom messages to upstream from server logic.
        let (upstream_custom_message_inject_tx, upstream_custom_message_inject_rx) =
            mpsc::channel(CUSTOM_MESSAGE_QUEUE_SIZE);

        // Downstream reader
        let mut downstream_custom_message_reader = match session.downstream_custom_message() {
            Ok(Some(rx)) => rx,
            Ok(None) => Box::new(futures::stream::empty::<Result<Bytes>>()),
            Err(err) => return (false, Some(err)),
        };

        // Downstream writer
        let (mut downstream_custom_message_writer, downstream_custom_final_hop): (
            Box<dyn CustomMessageWrite>,
            bool, // if this hop is final
        ) = if let Some(custom_session) = session.downstream_session.as_custom_mut() {
            (
                custom_session
                    .take_custom_message_writer()
                    .expect("custom downstream must have a custom message writer"),
                false,
            )
        } else {
            (Box::new(()), true)
        };

        // A channel to inject custom messages to downstream from server logic.
        let (downstream_custom_message_inject_tx, downstream_custom_message_inject_rx) =
            mpsc::channel(CUSTOM_MESSAGE_QUEUE_SIZE);

        // Filters for ProxyHttp trait
        let (upstream_custom_message_filter_tx, upstream_custom_message_filter_rx) =
            mpsc::channel(CUSTOM_MESSAGE_QUEUE_SIZE);
        let (downstream_custom_message_filter_tx, downstream_custom_message_filter_rx) =
            mpsc::channel(CUSTOM_MESSAGE_QUEUE_SIZE);

        // Cancellation channels for custom coroutines
        // The transmitters act as guards: when dropped, they signal the receivers to cancel.
        // `cancel_downstream_reader_tx` is held and later used to explicitly cancel.
        // `_cancel_upstream_reader_tx` is unused (prefixed with _) - it will be dropped at the
        // end of this scope, which automatically signals cancellation to the upstream reader.
        let (cancel_downstream_reader_tx, cancel_downstream_reader_rx) = oneshot::channel();
        let (_cancel_upstream_reader_tx, cancel_upstream_reader_rx) = oneshot::channel();

        let upstream_custom_message_forwarder = CustomMessageForwarder {
            ctx: "down_to_up".into(),
            reader: &mut downstream_custom_message_reader,
            writer: &mut upstream_custom_message_writer,
            filter: upstream_custom_message_filter_tx,
            inject: upstream_custom_message_inject_rx,
            cancel: cancel_downstream_reader_rx,
        };

        let downstream_custom_message_forwarder = CustomMessageForwarder {
            ctx: "up_to_down".into(),
            reader: &mut upstream_custom_message_reader,
            writer: &mut downstream_custom_message_writer,
            filter: downstream_custom_message_filter_tx,
            inject: downstream_custom_message_inject_rx,
            cancel: cancel_upstream_reader_rx,
        };

        if let Err(e) = self
            .inner
            .custom_forwarding(
                session,
                ctx,
                Some(upstream_custom_message_inject_tx),
                downstream_custom_message_inject_tx,
            )
            .await
        {
            return (false, Some(e));
        }

        /* read downstream body and upstream response at the same time */
        let ret = tokio::try_join!(
            self.custom_bidirection_down_to_up(
                session,
                &mut client_body,
                rx,
                ctx,
                upstream_custom_message_filter_rx,
                downstream_custom_message_filter_rx,
                downstream_custom_final_hop,
                cancel_downstream_reader_tx,
            ),
            custom_pipe_up_to_down_response(client_session, tx),
            upstream_custom_message_forwarder.proxy(),
            downstream_custom_message_forwarder.proxy(),
        );

        if let Some(custom_session) = session.downstream_session.as_custom_mut() {
            custom_session
                .restore_custom_message_writer(downstream_custom_message_writer)
                .expect("downstream restore_custom_message_writer should be empty");

            custom_session
                .restore_custom_message_reader(downstream_custom_message_reader)
                .expect("downstream restore_custom_message_reader should be empty");
        }

        match ret {
            Ok((downstream_can_reuse, _upstream, _custom_up_down, _custom_down_up)) => {
                (downstream_can_reuse, None)
            }
            Err(e) => (false, Some(e)),
        }
    }

    // returns whether server (downstream) session can be reused
    #[allow(clippy::too_many_arguments)]
    async fn custom_bidirection_down_to_up(
        &self,
        session: &mut Session,
        client_body: &mut Box<dyn BodyWrite>,
        mut rx: mpsc::Receiver<HttpTask>,
        ctx: &mut SV::CTX,
        mut upstream_custom_message_filter_rx: mpsc::Receiver<(
            Bytes,
            oneshot::Sender<Option<Bytes>>,
        )>,
        mut downstream_custom_message_filter_rx: mpsc::Receiver<(
            Bytes,
            oneshot::Sender<Option<Bytes>>,
        )>,
        downstream_custom_final_hop: bool,
        cancel_downstream_reader_tx: oneshot::Sender<()>,
    ) -> Result<bool>
    where
        SV: ProxyHttp + Send + Sync,
        SV::CTX: Send + Sync,
    {
        let mut cancel_downstream_reader_tx = Some(cancel_downstream_reader_tx);

        let mut downstream_state = DownstreamStateMachine::new(session.as_mut().is_body_done());

        // retry, send buffer if it exists
        if let Some(buffer) = session.as_mut().get_retry_buffer() {
            self.send_body_to_custom(
                session,
                Some(buffer),
                downstream_state.is_done(),
                client_body,
                ctx,
            )
            .await?;
        }

        let mut response_state = ResponseStateMachine::new();

        // these two below can be wrapped into an internal ctx
        // use cache when upstream revalidates (or TODO: error)
        let mut serve_from_cache = ServeFromCache::new();
        let mut range_body_filter = proxy_cache::range_filter::RangeBodyFilter::new();

        let mut upstream_custom = true;
        let mut downstream_custom = true;

        /* duplex mode
         * see the Same function for h1 for more comments
         */
        while !downstream_state.is_done()
            || !response_state.is_done()
            || upstream_custom
            || downstream_custom
        {
            // partial read support, this check will also be false if cache is disabled.
            let support_cache_partial_read =
                session.cache.support_streaming_partial_write() == Some(true);
            let upgraded = session.was_upgraded();

            tokio::select! {
                body = session.downstream_session.read_body_or_idle(downstream_state.is_done()), if downstream_state.can_poll() => {
                    let body = match body {
                        Ok(b) => b,
                        Err(e) => {
                            let wait_for_cache_fill = (!serve_from_cache.is_on() && support_cache_partial_read)
                                || serve_from_cache.is_miss();
                            if wait_for_cache_fill {
                                // ignore downstream error so that upstream can continue to write cache
                                downstream_state.to_errored();
                                warn!(
                                    "Downstream Error ignored during caching: {}, {}",
                                    e,
                                    self.inner.request_summary(session, ctx)
                                );
                                continue;
                           } else {
                                return Err(e.into_down());
                           }
                        }
                    };
                    let is_body_done = session.is_body_done();

                    match self.send_body_to_custom(session, body, is_body_done, client_body, ctx).await {
                        Ok(request_done) =>  {
                            downstream_state.maybe_finished(request_done);
                        },
                        Err(e) => {
                            // mark request done, attempt to drain receive
                            warn!("body send error: {e}");

                            // upstream is what actually errored but we don't want to continue
                            // polling the downstream body
                            downstream_state.to_errored();

                            // downstream still trying to send something, but the upstream is already stooped
                            // cancel the custom downstream to upstream coroutine, because the proxy will not see EOS.
                            let _ = cancel_downstream_reader_tx.take().expect("cancel must be set and called once").send(());
                        }
                    };
                },

                task = rx.recv(), if !response_state.upstream_done() => {
                    debug!("upstream event");

                    if let Some(t) = task {
                        debug!("upstream event custom: {:?}", t);
                        if serve_from_cache.should_discard_upstream() {
                            // just drain, do we need to do anything else?
                           continue;
                        }
                        // pull as many tasks as we can
                        let mut tasks = Vec::with_capacity(TASK_BUFFER_SIZE);
                        tasks.push(t);
                        while let Ok(task) = rx.try_recv() {
                            tasks.push(task);
                        }

                        /* run filters before sending to downstream */
                        let mut filtered_tasks = Vec::with_capacity(TASK_BUFFER_SIZE);
                        for mut t in tasks {
                            if self.revalidate_or_stale(session, &mut t, ctx).await {
                                serve_from_cache.enable();
                                response_state.enable_cached_response();
                                // skip downstream filtering entirely as the 304 will not be sent
                                break;
                            }
                            session.upstream_compression.response_filter(&mut t);
                            // check error and abort
                            // otherwise the error is surfaced via write_response_tasks()
                            if !serve_from_cache.should_send_to_downstream() {
                                if let HttpTask::Failed(e) = t {
                                    return Err(e);
                                }
                            }
                            filtered_tasks.push(
                                self.custom_response_filter(session, t, ctx,
                                    &mut serve_from_cache,
                                    &mut range_body_filter, false).await?);
                            if serve_from_cache.is_miss_header() {
                                response_state.enable_cached_response();
                            }
                        }

                        if !serve_from_cache.should_send_to_downstream() {
                            // TODO: need to derive response_done from filtered_tasks in case downstream failed already
                            continue;
                        }

                        let upgraded = session.was_upgraded();
                        let response_done = session.write_response_tasks(filtered_tasks).await?;
                        if !upgraded && session.was_upgraded() && downstream_state.can_poll() {
                            // just upgraded, the downstream state should be reset to continue to
                            // poll body
                            trace!("reset downstream state on upgrade");
                            downstream_state.reset();
                        }

                        response_state.maybe_set_upstream_done(response_done);
                    } else {
                        debug!("empty upstream event");
                        response_state.maybe_set_upstream_done(true);
                    }
                }

                task = serve_from_cache.next_http_task(&mut session.cache, &mut range_body_filter, upgraded),
                    if !response_state.cached_done() && !downstream_state.is_errored() && serve_from_cache.is_on() => {
                    let task = self.custom_response_filter(session, task?, ctx,
                        &mut serve_from_cache,
                        &mut range_body_filter, true).await?;
                    match session.write_response_tasks(vec![task]).await {
                        Ok(b) => response_state.maybe_set_cache_done(b),
                        Err(e) => if serve_from_cache.is_miss() {
                            // give up writing to downstream but wait for upstream cache write to finish
                            downstream_state.to_errored();
                            response_state.maybe_set_cache_done(true);
                            warn!(
                                "Downstream Error ignored during caching: {}, {}",
                                e,
                                self.inner.request_summary(session, ctx)
                            );
                            continue;
                        } else {
                            return Err(e);
                        }
                    }
                    if response_state.cached_done() {
                        if let Err(e) = session.cache.finish_hit_handler().await {
                            warn!("Error during finish_hit_handler: {}", e);
                        }
                    }
                }

                ret = upstream_custom_message_filter_rx.recv(), if upstream_custom => {
                    let Some(msg) = ret else {
                        debug!("upstream_custom_message_filter_rx: custom downstream to upstream exited on reading");
                        upstream_custom = false;
                        continue;
                    };

                    let (data, callback) = msg;

                    let new_msg = self.inner
                        .downstream_custom_message_proxy_filter(session, data, ctx, false)  // false because the upstream is custom
                        .await?;

                    if callback.send(new_msg).is_err() {
                        debug!("upstream_custom_message_incoming_rx: custom downstream to upstream exited on callback");
                        upstream_custom = false;
                        continue;
                    };
                },

                ret = downstream_custom_message_filter_rx.recv(), if downstream_custom => {
                    let Some(msg) = ret else {
                        debug!("downstream_custom_message_filter_rx: custom upstream to downstream exited on reading");
                        downstream_custom = false;
                        continue;
                    };

                    let (data, callback) = msg;

                    let new_msg = self.inner
                        .upstream_custom_message_proxy_filter(session, data, ctx, downstream_custom_final_hop)
                        .await?;

                    if callback.send(new_msg).is_err() {
                        debug!("downstream_custom_message_filter_rx: custom upstream to downstream exited on callback");
                        downstream_custom = false;
                        continue
                    };
                },

                else => {
                    break;
                }
            }
        }

        // Re-raise the error then the loop is finished.
        if downstream_state.is_errored() {
            let err = Error::e_explain(WriteError, "downstream_state is_errored");
            error!("custom_bidirection_down_to_up: downstream_state.is_errored",);
            return err;
        }

        client_body.cleanup().await?;

        let mut reuse_downstream = !downstream_state.is_errored();
        if reuse_downstream {
            match session.as_mut().finish_body().await {
                Ok(_) => {
                    debug!("finished sending body to downstream");
                }
                Err(e) => {
                    error!("Error finish sending body to downstream: {}", e);
                    reuse_downstream = false;
                }
            }
        }
        Ok(reuse_downstream)
    }

    async fn custom_response_filter(
        &self,
        session: &mut Session,
        mut task: HttpTask,
        ctx: &mut SV::CTX,
        serve_from_cache: &mut ServeFromCache,
        range_body_filter: &mut RangeBodyFilter,
        from_cache: bool, // are the task from cache already
    ) -> Result<HttpTask>
    where
        SV: ProxyHttp + Send + Sync,
        SV::CTX: Send + Sync,
    {
        if !from_cache {
            self.upstream_filter(session, &mut task, ctx).await?;

            // cache the original response before any downstream transformation
            // requests that bypassed cache still need to run filters to see if the response has become cacheable
            if session.cache.enabled() || session.cache.bypassing() {
                if let Err(e) = self
                    .cache_http_task(session, &task, ctx, serve_from_cache)
                    .await
                {
                    session.cache.disable(NoCacheReason::StorageError);
                    if serve_from_cache.is_miss_body() {
                        // if the response stream cache body during miss but write fails, it has to
                        // give up the entire request
                        return Err(e);
                    } else {
                        // otherwise, continue processing the response
                        warn!(
                            "Fail to cache response: {}, {}",
                            e,
                            self.inner.request_summary(session, ctx)
                        );
                    }
                }
            }
            // skip the downstream filtering if these tasks are just for cache admission
            if !serve_from_cache.should_send_to_downstream() {
                return Ok(task);
            }
        } // else: cached/local response, no need to trigger upstream filters and caching

        match task {
            HttpTask::Header(mut header, eos) => {
                /* Downstream revalidation, only needed when cache is on because otherwise origin
                 * will handle it */
                // TODO: if cache is disabled during response phase, we should still do the filter
                if session.cache.enabled() {
                    self.downstream_response_conditional_filter(
                        serve_from_cache,
                        session,
                        &mut header,
                        ctx,
                    );
                    if !session.ignore_downstream_range {
                        let range_type = self.inner.range_header_filter(session, &mut header, ctx);
                        range_body_filter.set(range_type);
                    }
                }

                self.inner
                    .response_filter(session, &mut header, ctx)
                    .await?;
                /* Downgrade the version so that write_response_header won't panic */
                header.set_version(Version::HTTP_11);

                // these status codes / method cannot have body, so no need to add chunked encoding
                let no_body = session.req_header().method == "HEAD"
                    || matches!(header.status.as_u16(), 204 | 304);

                /* Add chunked header to tell downstream to use chunked encoding
                 * during the absent of content-length */
                if !no_body
                    && !header.status.is_informational()
                    && header.headers.get(http::header::CONTENT_LENGTH).is_none()
                {
                    header.insert_header(http::header::TRANSFER_ENCODING, "chunked")?;
                }
                Ok(HttpTask::Header(header, eos))
            }
            HttpTask::Body(data, eos) => {
                let mut data = range_body_filter.filter_body(data);
                if let Some(duration) = self
                    .inner
                    .response_body_filter(session, &mut data, eos, ctx)?
                {
                    trace!("delaying response for {duration:?}");
                    time::sleep(duration).await;
                }
                Ok(HttpTask::Body(data, eos))
            }
            HttpTask::UpgradedBody(mut data, eos) => {
                // range body filter doesn't apply to upgraded body
                if let Some(duration) = self
                    .inner
                    .response_body_filter(session, &mut data, eos, ctx)?
                {
                    trace!("delaying upgraded response for {duration:?}");
                    time::sleep(duration).await;
                }
                Ok(HttpTask::UpgradedBody(data, eos))
            }
            HttpTask::Trailer(mut trailers) => {
                let trailer_buffer = match trailers.as_mut() {
                    Some(trailers) => {
                        debug!("Parsing response trailers..");
                        match self
                            .inner
                            .response_trailer_filter(session, trailers, ctx)
                            .await
                        {
                            Ok(buf) => buf,
                            Err(e) => {
                                error!(
                                    "Encountered error while filtering upstream trailers {:?}",
                                    e
                                );
                                None
                            }
                        }
                    }
                    _ => None,
                };
                // if we have a trailer buffer write it to the downstream response body
                if let Some(buffer) = trailer_buffer {
                    // write_body will not write additional bytes after reaching the content-length
                    // for gRPC H2 -> H1 this is not a problem but may be a problem for non gRPC code
                    // https://http2.github.io/http2-spec/#malformed
                    Ok(HttpTask::Body(Some(buffer), true))
                } else {
                    Ok(HttpTask::Trailer(trailers))
                }
            }
            HttpTask::Done => Ok(task),
            HttpTask::Failed(_) => Ok(task), // Do nothing just pass the error down
        }
    }

    async fn send_body_to_custom(
        &self,
        session: &mut Session,
        mut data: Option<Bytes>,
        end_of_body: bool,
        client_body: &mut Box<dyn BodyWrite>,
        ctx: &mut SV::CTX,
    ) -> Result<bool>
    where
        SV: ProxyHttp + Send + Sync,
        SV::CTX: Send + Sync,
    {
        session
            .downstream_modules_ctx
            .request_body_filter(&mut data, end_of_body)
            .await?;

        self.inner
            .request_body_filter(session, &mut data, end_of_body, ctx)
            .await?;

        if session.was_upgraded() {
            client_body.upgrade_body_writer();
        }

        /* it is normal to get 0 bytes because of multi-chunk parsing or request_body_filter.
         * Although there is no harm writing empty byte to custom, unlike h1, we ignore it
         * for consistency */
        if !end_of_body && data.as_ref().is_some_and(|d| d.is_empty()) {
            return Ok(false);
        }

        if let Some(mut data) = data {
            client_body
                .write_all_buf(&mut data)
                .await
                .map_err(|e| e.into_up())?;
            if end_of_body {
                client_body.finish().await.map_err(|e| e.into_up())?;
            }
        } else {
            debug!("Read downstream body done");
            client_body
                .finish()
                .await
                .map_err(|e| {
                    Error::because(WriteError, "while shutdown send data stream on no data", e)
                })
                .map_err(|e| e.into_up())?;
        }

        Ok(end_of_body)
    }
}

/* Read response header, body and trailer from custom upstream and send them to tx */
async fn custom_pipe_up_to_down_response<S: CustomSession>(
    client: &mut S,
    tx: mpsc::Sender<HttpTask>,
) -> Result<()> {
    let mut is_informational = true;
    while is_informational {
        client
            .read_response_header()
            .await
            .map_err(|e| e.into_up())?;
        let resp_header = Box::new(client.response_header().expect("just read").clone());
        // `101 Switching Protocols` is a response to the http1 Upgrade header and it's final response.
        // The WebSocket Protocol https://datatracker.ietf.org/doc/html/rfc6455
        is_informational = is_informational_except_101(resp_header.status.as_u16() as u32);

        match client.check_response_end_or_error(true).await {
            Ok(eos) => {
                tx.send(HttpTask::Header(resp_header, eos))
                    .await
                    .or_err(InternalError, "sending custom headers to pipe")?;
            }
            Err(e) => {
                // If upstream errored, then push error to downstream and then quit
                // Don't care if send fails (which means downstream already gone)
                // we were still able to retrieve the headers, so try sending
                let _ = tx.send(HttpTask::Header(resp_header, false)).await;
                let _ = tx.send(HttpTask::Failed(e.into_up())).await;
                return Ok(());
            }
        }
    }

    while let Some(chunk) = client
        .read_response_body()
        .await
        .map_err(|e| e.into_up())
        .transpose()
    {
        let data = match chunk {
            Ok(d) => d,
            Err(e) => {
                // Push the error to downstream and then quit
                let _ = tx.send(HttpTask::Failed(e.into_up())).await;
                // Downstream should consume all remaining data and handle the error
                return Ok(());
            }
        };

        match client.check_response_end_or_error(false).await {
            Ok(eos) => {
                let empty = data.is_empty();
                if empty && !eos {
                    /* it is normal to get 0 bytes because of multi-chunk
                     * don't write 0 bytes to downstream since it will be
                     * misread as the terminating chunk */
                    continue;
                }
                let body_task = if client.was_upgraded() {
                    HttpTask::UpgradedBody(Some(data), eos)
                } else {
                    HttpTask::Body(Some(data), eos)
                };
                let sent = tx
                    .send(body_task)
                    .await
                    .or_err(InternalError, "sending custom body to pipe");
                // If the if the response with content-length is sent to an HTTP1 downstream,
                // custom_bidirection_down_to_up() could decide that the body has finished and exit without
                // waiting for this function to signal the eos. In this case tx being closed is not
                // an sign of error. It should happen if the only thing left for the custom to send is
                // an empty data frame with eos set.
                if sent.is_err() && eos && empty {
                    return Ok(());
                }
                sent?;
            }
            Err(e) => {
                // Similar to above, push the error to downstream and then quit
                let _ = tx.send(HttpTask::Failed(e.into_up())).await;
                return Ok(());
            }
        }
    }

    // attempt to get trailers
    let trailers = match client.read_trailers().await {
        Ok(t) => t,
        Err(e) => {
            // Similar to above, push the error to downstream and then quit
            let _ = tx.send(HttpTask::Failed(e.into_up())).await;
            return Ok(());
        }
    };

    let trailers = trailers.map(Box::new);

    if trailers.is_some() {
        tx.send(HttpTask::Trailer(trailers))
            .await
            .or_err(InternalError, "sending custom trailer to pipe")?;
    }

    tx.send(HttpTask::Done)
        .await
        .unwrap_or_else(|_| debug!("custom channel closed!"));

    Ok(())
}

struct CustomMessageForwarder<'a> {
    ctx: ImmutStr,
    writer: &'a mut Box<dyn CustomMessageWrite>,
    reader:
        &'a mut Box<dyn futures::Stream<Item = Result<Bytes, Box<Error>>> + Send + Sync + Unpin>,
    inject: mpsc::Receiver<Bytes>,
    filter: mpsc::Sender<(Bytes, oneshot::Sender<Option<Bytes>>)>,
    cancel: oneshot::Receiver<()>,
}

impl CustomMessageForwarder<'_> {
    async fn proxy(mut self) -> Result<()> {
        let forwarder = async {
            let mut injector_status = true;
            let mut reader_status = true;

            debug!("{}: CustomMessageForwarder: start", self.ctx);

            while injector_status || reader_status {
                let (data, proxied) = tokio::select! {
                    ret = self.inject.recv(), if injector_status => {
                        let Some(data) = ret else {
                            injector_status = false;
                            continue
                        };
                        (data, false)
                    },

                    ret = self.reader.next(), if reader_status  => {
                        let Some(data) = ret else {
                            reader_status = false;
                            continue
                        };

                        let data = match data {
                            Ok(data) => data,
                            Err(err) => {
                                reader_status = false;
                                warn!("{}: CustomMessageForwarder: reader returned err: {err:?}", self.ctx);
                                continue;
                            },
                        };
                        (data, true)
                    },
                };

                let (callback_tx, callback_rx) = oneshot::channel();

                // If data received from proxy send it to filter
                if proxied {
                    if self.filter.send((data, callback_tx)).await.is_err() {
                        debug!(
                            "{}: CustomMessageForwarder: filter receiver dropped",
                            self.ctx
                        );
                        return Error::e_explain(
                            WriteError,
                            "CustomMessageForwarder: main proxy thread exited on filter send",
                        );
                    };
                } else {
                    callback_tx
                        .send(Some(data))
                        .expect("sending from the same thread");
                }

                match callback_rx.await {
                    Ok(None) => continue, // message was filtered
                    Ok(Some(msg)) => {
                        self.writer.write_custom_message(msg).await?;
                    }
                    Err(err) => {
                        debug!(
                            "{}: CustomMessageForwarder: callback_rx return error: {err}",
                            self.ctx
                        );
                        return Error::e_because(
                            WriteError,
                            "CustomMessageForwarder: main proxy thread exited on callback_rx await",
                            err,
                        );
                    }
                };
            }

            debug!("{}: CustomMessageForwarder: exit loop", self.ctx);

            let ret = self.writer.finish_custom().await;
            if let Err(ref err) = ret {
                debug!(
                    "{}: CustomMessageForwarder: finish_custom return error: {err}",
                    self.ctx
                );
            };
            ret?;

            debug!(
                "{}: CustomMessageForwarder: exit loop successfully",
                self.ctx
            );

            Ok(())
        };

        tokio::select! {
            ret = &mut self.cancel => {
                debug!("{}: CustomMessageForwarder: canceled while waiting for new messages: {ret:?}", self.ctx);
                Ok(())
            },
            ret = forwarder => ret
        }
    }
}


================================================
FILE: pingora-proxy/src/proxy_h1.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use futures::future::OptionFuture;
use futures::StreamExt;

use super::*;
use crate::proxy_cache::{range_filter::RangeBodyFilter, ServeFromCache};
use crate::proxy_common::*;
use pingora_cache::CachePhase;
use pingora_core::protocols::http::custom::CUSTOM_MESSAGE_QUEUE_SIZE;

impl<SV, C> HttpProxy<SV, C>
where
    C: custom::Connector,
{
    pub(crate) async fn proxy_1to1(
        &self,
        session: &mut Session,
        client_session: &mut HttpSessionV1,
        peer: &HttpPeer,
        ctx: &mut SV::CTX,
    ) -> (bool, bool, Option<Box<Error>>)
    where
        SV: ProxyHttp + Send + Sync,
        SV::CTX: Send + Sync,
    {
        client_session.read_timeout = peer.options.read_timeout;
        client_session.write_timeout = peer.options.write_timeout;

        // phase 2 send to upstream

        let mut req = session.req_header().clone();

        // Convert HTTP2 headers to H1
        if req.version == Version::HTTP_2 {
            req.set_version(Version::HTTP_11);
            // if client has body but has no content length, add chunked encoding
            // https://datatracker.ietf.org/doc/html/rfc9112#name-message-body
            // "The presence of a message body in a request is signaled by a Content-Length or Transfer-Encoding header field."
            if !session.is_body_empty() && session.get_header(header::CONTENT_LENGTH).is_none() {
                req.insert_header(header::TRANSFER_ENCODING, "chunked")
                    .unwrap();
            }
            if session.get_header(header::HOST).is_none() {
                // H2 is required to set :authority, but no necessarily header
                // most H1 server expect host header, so convert
                let host = req.uri.authority().map_or("", |a| a.as_str()).to_owned();
                req.insert_header(header::HOST, host).unwrap();
            }
            // TODO: Add keepalive header for connection reuse, but this is not required per RFC
        }

        if session.cache.enabled() {
            pingora_cache::filters::upstream::request_filter(
                &mut req,
                session.cache.maybe_cache_meta(),
            );
            session.mark_upstream_headers_mutated_for_cache();
        }

        match self
            .inner
            .upstream_request_filter(session, &mut req, ctx)
            .await
        {
            Ok(_) => { /* continue */ }
            Err(e) => {
                return (false, true, Some(e));
            }
        }

        session.upstream_compression.request_filter(&req);

        debug!("Sending header to upstream {:?}", req);

        match client_session.write_request_header(Box::new(req)).await {
            Ok(_) => { /* Continue */ }
            Err(e) => {
                return (false, false, Some(e.into_up()));
            }
        }

        let mut downstream_custom_message_writer = session
            .downstream_session
            .as_custom_mut()
            .and_then(|c| c.take_custom_message_writer());

        let (tx_upstream, rx_upstream) = mpsc::channel::<HttpTask>(TASK_BUFFER_SIZE);
        let (tx_downstream, rx_downstream) = mpsc::channel::<HttpTask>(TASK_BUFFER_SIZE);

        session.as_mut().enable_retry_buffering();

        // start bi-directional streaming
        let ret = tokio::try_join!(
            self.proxy_handle_downstream(
                session,
                tx_downstream,
                rx_upstream,
                ctx,
                &mut downstream_custom_message_writer
            ),
            self.proxy_handle_upstream(client_session, tx_upstream, rx_downstream),
        );

        if let Some(custom_session) = session.downstream_session.as_custom_mut() {
            if let Some(downstream_custom_message_writer) = downstream_custom_message_writer {
                match custom_session.restore_custom_message_writer(downstream_custom_message_writer)
                {
                    Ok(_) => { /* continue */ }
                    Err(e) => {
                        return (false, false, Some(e));
                    }
                }
            }
        }

        match ret {
            Ok((downstream_can_reuse, _upstream)) => (downstream_can_reuse, true, None),
            Err(e) => (false, false, Some(e)),
        }
    }

    pub(crate) async fn proxy_to_h1_upstream(
        &self,
        session: &mut Session,
        client_session: &mut HttpSessionV1,
        reused: bool,
        peer: &HttpPeer,
        ctx: &mut SV::CTX,
    ) -> (bool, bool, Option<Box<Error>>)
    // (reuse_server, reuse_client, error)
    where
        SV: ProxyHttp + Send + Sync,
        SV::CTX: Send + Sync,
    {
        #[cfg(windows)]
        let raw = client_session.id() as std::os::windows::io::RawSocket;
        #[cfg(unix)]
        let raw = client_session.id();

        let initial_write_pending = client_session.stream().get_write_pending_time();

        if let Err(e) = self
            .inner
            .connected_to_upstream(
                session,
                reused,
                peer,
                raw,
                Some(client_session.digest()),
                ctx,
            )
            .await
        {
            return (false, false, Some(e));
        }

        let (server_session_reuse, client_session_reuse, error) =
            self.proxy_1to1(session, client_session, peer, ctx).await;

        // Record upstream response body bytes received (payload only) for logging consumers.
        let upstream_bytes_total = client_session.body_bytes_received();
        session.set_upstream_body_bytes_received(upstream_bytes_total);

        // Record upstream write pending time for this session only (delta from baseline).
        let current_write_pending = client_session.stream().get_write_pending_time();
        let upstream_write_pending = current_write_pending.saturating_sub(initial_write_pending);
        session.set_upstream_write_pending_time(upstream_write_pending);

        (server_session_reuse, client_session_reuse, error)
    }

    async fn proxy_handle_upstream(
        &self,
        client_session: &mut HttpSessionV1,
        tx: mpsc::Sender<HttpTask>,
        mut rx: mpsc::Receiver<HttpTask>,
    ) -> Result<()>
    where
        SV: ProxyHttp + Send + Sync,
        SV::CTX: Send + Sync,
    {
        let mut request_done = false;
        let mut response_done = false;
        let mut send_error = None;
        let mut upgraded = false;

        /* duplex mode, wait for either to complete */
        while !request_done || !response_done {
            tokio::select! {
                res = client_session.read_response_task(), if !response_done => {
                    match res {
                        Ok(task) => {
                            response_done = task.is_end();
                            if !upgraded && client_session.was_upgraded() {
                                // upgrade can only happen once
                                upgraded = true;
                                if send_error.is_none() {
                                    // continue receiving from downstream after body mode change
                                    request_done = false;
                                }
                            }
                            let type_str = task.type_str();
                            let result = tx.send(task)
                                .await.or_err_with(
                                    InternalError,
                                    || format!("Failed to send upstream task {type_str}{} to pipe",
                                        if response_done { " (end)" } else {""})
                                );
                            // If the request is upgraded, the downstream pipe can early exit
                            // when the downstream connection is closed.
                            // In that case, this function should ignore that the pipe is closed.
                            // So that this function could read the rest events from rx including
                            // the closure, then exit.
                            if result.is_err() && !client_session.was_upgraded() {
                                return result;
                            }
                        },
                        Err(e) => {
                            // Push the error to downstream and then quit
                            // Don't care if send fails: downstream already gone
                            let _ = tx.send(HttpTask::Failed(send_error.unwrap_or(e).into_up())).await;
                            // Downstream should consume all remaining data and handle the error
                            return Ok(())
                        }
                    }
                },

                body = rx.recv(), if !request_done => {
                    match send_body_to1(client_session, body).await {
                        Ok(send_done) => {
                            request_done = send_done;
                            // An upgraded request is terminated when either side is done
                            if request_done && client_session.was_upgraded() {
                                response_done = true;
                            }
                        },
                        Err(e) => {
                           warn!("send error, draining read buf: {e}");
                           request_done = true;

                           send_error = Some(e);
                           continue
                        }
                    }
                },

                else => {
                    // this shouldn't be reached as the while loop would already exit
                    break;
                }
            }
        }

        Ok(())
    }

    // todo use this function to replace bidirection_1to2()
    // returns whether this server (downstream) session can be reused
    async fn proxy_handle_downstream(
        &self,
        session: &mut Session,
        tx: mpsc::Sender<HttpTask>,
        mut rx: mpsc::Receiver<HttpTask>,
        ctx: &mut SV::CTX,
        downstream_custom_message_writer: &mut Option<Box<dyn CustomMessageWrite>>,
    ) -> Result<bool>
    where
        SV: ProxyHttp + Send + Sync,
        SV::CTX: Send + Sync,
    {
        // setup custom message forwarding, if downstream supports it
        let (
            mut downstream_custom_read,
            mut downstream_custom_write,
            downstream_custom_message_custom_forwarding,
            mut downstream_custom_message_inject_rx,
            mut downstream_custom_message_reader,
        ) = if downstream_custom_message_writer.is_some() {
            let reader = session.downstream_custom_message()?;
            let (inject_tx, inject_rx) = mpsc::channel::<Bytes>(CUSTOM_MESSAGE_QUEUE_SIZE);
            (true, true, Some(inject_tx), Some(inject_rx), reader)
        } else {
            (false, false, None, None, None)
        };

        if let Some(custom_forwarding) = downstream_custom_message_custom_forwarding {
            self.inner
                .custom_forwarding(session, ctx, None, custom_forwarding)
                .await?;
        }

        let mut downstream_state = DownstreamStateMachine::new(session.as_mut().is_body_done());

        let buffer = session.as_ref().get_retry_buffer();

        // retry, send buffer if it exists or body empty
        if buffer.is_some() || session.as_mut().is_body_empty() {
            let send_permit = tx
                .reserve()
                .await
                .or_err(InternalError, "reserving body pipe")?;
            self.send_body_to_pipe(
                session,
                buffer,
                downstream_state.is_done(),
                send_permit,
                ctx,
            )
            .await?;
        }

        let mut response_state = ResponseStateMachine::new();

        // these two below can be wrapped into an internal ctx
        // use cache when upstream revalidates (or TODO: error)
        let mut serve_from_cache = proxy_cache::ServeFromCache::new();
        let mut range_body_filter = proxy_cache::range_filter::RangeBodyFilter::new();

        /* duplex mode without caching
         * Read body from downstream while reading response from upstream
         * If response is done, only read body from downstream
         * If request is done, read response from upstream while idling downstream (to close quickly)
         * If both are done, quit the loop
         *
         * With caching + but without partial read support
         * Similar to above, cache admission write happen when the data is write to downstream
         *
         * With caching + partial read support
         * A. Read upstream response and write to cache
         * B. Read data from cache and send to downstream
         * If B fails (usually downstream close), continue A.
         * If A fails, exit with error.
         * If both are done, quit the loop
         * Usually there is no request body to read for cacheable request
         */
        while !downstream_state.is_done()
            || !response_state.is_done()
            || downstream_custom_read && !downstream_state.is_errored()
            || downstream_custom_write
        {
            // reserve tx capacity ahead to avoid deadlock, see below

            let send_permit = tx
                .try_reserve()
                .or_err(InternalError, "try_reserve() body pipe for upstream");

            // Use optional futures to allow using optional channels in select branches
            let custom_inject_rx_recv: OptionFuture<_> = downstream_custom_message_inject_rx
                .as_mut()
                .map(|rx| rx.recv())
                .into();
            let custom_reader_next: OptionFuture<_> = downstream_custom_message_reader
                .as_mut()
                .map(|reader| reader.next())
                .into();

            // partial read support, this check will also be false if cache is disabled.
            let support_cache_partial_read =
                session.cache.support_streaming_partial_write() == Some(true);
            let upgraded = session.was_upgraded();

            tokio::select! {
                // only try to send to pipe if there is capacity to avoid deadlock
                // Otherwise deadlock could happen if both upstream and downstream are blocked
                // on sending to their corresponding pipes which are both full.
                body = session.downstream_session.read_body_or_idle(downstream_state.is_done()),
                    if downstream_state.can_poll() && send_permit.is_ok() => {

                    debug!("downstream event");
                    let body = match body {
                        Ok(b) => b,
                        Err(e) => {
                            let wait_for_cache_fill = (!serve_from_cache.is_on() && support_cache_partial_read)
                                || serve_from_cache.is_miss();
                            if wait_for_cache_fill {
                                // ignore downstream error so that upstream can continue to write cache
                                downstream_state.to_errored();
                                warn!(
                                    "Downstream Error ignored during caching: {}, {}",
                                    e,
                                    self.inner.request_summary(session, ctx)
                                );
                                // This will not be treated as a final error, but we should signal to
                                // downstream session regardless
                                session.downstream_session.on_proxy_failure(e);
                                continue;
                           } else {
                                return Err(e.into_down());
                           }
                        }
                    };
                    // If the request is websocket, `None` body means the request is closed.
                    // Set the response to be done as well so that the request completes normally.
                    if body.is_none() && session.was_upgraded() {
                        response_state.maybe_set_upstream_done(true);
                    }
                    // TODO: consider just drain this if serve_from_cache is set
                    let is_body_done = session.is_body_done();
                    let request_done = self.send_body_to_pipe(
                        session,
                        body,
                        is_body_done,
                        send_permit.unwrap(), // safe because we checked is_ok()
                        ctx,
                    )
                    .await?;
                    downstream_state.maybe_finished(request_done);
                },

                _ = tx.reserve(), if downstream_state.is_reading() && send_permit.is_err() => {
                    // If tx is closed, the upstream has already finished its job.
                    downstream_state.maybe_finished(tx.is_closed());
                    debug!("waiting for permit {send_permit:?}, upstream closed {}", tx.is_closed());
                    /* No permit, wait on more capacity to avoid starving.
                     * Otherwise this select only blocks on rx, which might send no data
                     * before the entire body is uploaded.
                     * once more capacity arrives we just loop back
                     */
                },

                task = rx.recv(), if !response_state.upstream_done() => {
                    debug!("upstream event: {:?}", task);
                    if let Some(t) = task {
                        if serve_from_cache.should_discard_upstream() {
                            // just drain, do we need to do anything else?
                           continue;
                        }
                        // pull as many tasks as we can
                        let mut tasks = Vec::with_capacity(TASK_BUFFER_SIZE);
                        tasks.push(t);
                        // tokio::task::unconstrained because now_or_never may yield None when the future is ready
                        while let Some(maybe_task) = tokio::task::unconstrained(rx.recv()).now_or_never() {
                            debug!("upstream event now: {:?}", maybe_task);
                            if let Some(t) = maybe_task {
                                tasks.push(t);
                            } else {
                                break; // upstream closed
                            }
                        }

                        /* run filters before sending to downstream */
                        let mut filtered_tasks = Vec::with_capacity(TASK_BUFFER_SIZE);
                        for mut t in tasks {
                            if self.revalidate_or_stale(session, &mut t, ctx).await {
                                serve_from_cache.enable();
                                response_state.enable_cached_response();
                                // skip downstream filtering entirely as the 304 will not be sent
                                break;
                            }
                            session.upstream_compression.response_filter(&mut t);
                            let task = self.h1_response_filter(session, t, ctx,
                                &mut serve_from_cache,
                                &mut range_body_filter, false).await?;
                            if serve_from_cache.is_miss_header() {
                                response_state.enable_cached_response();
                            }
                            // check error and abort
                            // otherwise the error is surfaced via write_response_tasks()
                            if !serve_from_cache.should_send_to_downstream() {
                                if let HttpTask::Failed(e) = task {
                                    return Err(e);
                                }
                            }
                            filtered_tasks.push(task);
                        }

                        if !serve_from_cache.should_send_to_downstream() {
                            // TODO: need to derive response_done from filtered_tasks in case downstream failed already
                            continue;
                        }

                        // set to downstream
                        let upgraded = session.was_upgraded();
                        let response_done = session.write_response_tasks(filtered_tasks).await?;
                        if !upgraded && session.was_upgraded() && downstream_state.can_poll() {
                            // just upgraded, the downstream state should be reset to continue to
                            // poll body
                            trace!("reset downstream state on upgrade");
                            downstream_state.reset();
                        }
                        response_state.maybe_set_upstream_done(response_done);
                        // unsuccessful upgrade response (or end of upstream upgraded conn,
                        // which forces the body reader to complete) may force the request done
                        downstream_state.maybe_finished(session.is_body_done());
                    } else {
                        debug!("empty upstream event");
                        response_state.maybe_set_upstream_done(true);
                    }
                },

                task = serve_from_cache.next_http_task(&mut session.cache, &mut range_body_filter, upgraded),
                    if !response_state.cached_done() && !downstream_state.is_errored() && serve_from_cache.is_on() => {

                    let task = self.h1_response_filter(session, task?, ctx,
                        &mut serve_from_cache,
                        &mut range_body_filter, true).await?;
                    debug!("serve_from_cache task {task:?}");

                    match session.write_response_tasks(vec![task]).await {
                        Ok(b) => response_state.maybe_set_cache_done(b),
                        Err(e) => if serve_from_cache.is_miss() {
                            // give up writing to downstream but wait for upstream cache write to finish
                            downstream_state.to_errored();
                            response_state.maybe_set_cache_done(true);
                            warn!(
                                "Downstream Error ignored during caching: {}, {}",
                                e,
                                self.inner.request_summary(session, ctx)
                            );
                            // This will not be treated as a final error, but we should signal to
                            // downstream session regardless
                            session.downstream_session.on_proxy_failure(e);
                            continue;
                        } else {
                            return Err(e);
                        }
                    }
                    if response_state.cached_done() {
                        if let Err(e) = session.cache.finish_hit_handler().await {
                            warn!("Error during finish_hit_handler: {}", e);
                        }
                    }
                }

                data = custom_reader_next, if downstream_custom_read && !downstream_state.is_errored()  => {
                    let Some(data) = data.flatten() else {
                        downstream_custom_read = false;
                        continue;
                    };

                    let data = match data {
                        Ok(data) => data,
                        Err(err) =>  {
                            warn!("downstream_custom_message_reader got error: {err}");
                            downstream_custom_read = false;
                            continue;
                        },
                    };

                    self.inner
                        .downstream_custom_message_proxy_filter(session, data, ctx, true) // true, because it's the last hop for downstream proxying
                        .await?;
                },

                data = custom_inject_rx_recv, if downstream_custom_write => {
                    match data.flatten() {
                        Some(data) => {
                            if let Some(ref mut custom_writer) = downstream_custom_message_writer {
                                custom_writer.write_custom_message(data).await?
                            }
                        },
                        None => {
                            downstream_custom_write = false;
                            if let Some(ref mut custom_writer) = downstream_custom_message_writer {
                                custom_writer.finish_custom().await?;
                            }
                        },
                    }
                },

                else => {
                    break;
                }
            }
        }

        if let Some(custom_session) = session.downstream_session.as_custom_mut() {
            if let Some(downstream_custom_message_reader) = downstream_custom_message_reader {
                custom_session
                    .restore_custom_message_reader(downstream_custom_message_reader)
                    .expect("downstream restore_custom_message_reader should be empty");
            }
        }

        let mut reuse_downstream = !downstream_state.is_errored();
        if reuse_downstream {
            match session.as_mut().finish_body().await {
                Ok(_) => {
                    debug!("finished sending body to downstream");
                }
                Err(e) => {
                    error!("Error finish sending body to downstream: {}", e);
                    reuse_downstream = false;
                }
            }
        }
        Ok(reuse_downstream)
    }

    async fn h1_response_filter(
        &self,
        session: &mut Session,
        mut task: HttpTask,
        ctx: &mut SV::CTX,
        serve_from_cache: &mut ServeFromCache,
        range_body_filter: &mut RangeBodyFilter,
        from_cache: bool, // are the task from cache already
    ) -> Result<HttpTask>
    where
        SV: ProxyHttp + Send + Sync,
        SV::CTX: Send + Sync,
    {
        // skip caching if already served from cache
        if !from_cache {
            if let Some(duration) = self.upstream_filter(session, &mut task, ctx).await? {
                trace!("delaying upstream response for {duration:?}");
                time::sleep(duration).await;
            }

            // cache the original response before any downstream transformation
            // requests that bypassed cache still need to run filters to see if the response has become cacheable
            if session.cache.enabled() || session.cache.bypassing() {
                if let Err(e) = self
                    .cache_http_task(session, &task, ctx, serve_from_cache)
                    .await
                {
                    session.cache.disable(NoCacheReason::StorageError);
                    if serve_from_cache.is_miss_body() {
                        // if the response stream cache body during miss but write fails, it has to
                        // give up the entire request
                        return Err(e);
                    } else {
                        // otherwise, continue processing the response
                        warn!(
                            "Fail to cache response: {}, {}",
                            e,
                            self.inner.request_summary(session, ctx)
                        );
                    }
                }
            }

            if !serve_from_cache.should_send_to_downstream() {
                return Ok(task);
            }
        } // else: cached/local response, no need to trigger upstream filters and caching

        // normally max file size is tracked in cache_http_task filters (when cache enabled),
        // we will track it in these filters before sending to downstream on specific conditions
        // when cache is disabled
        let track_max_cache_size = matches!(
            session.cache.phase(),
            CachePhase::Disabled(NoCacheReason::PredictedResponseTooLarge)
        );

        let res = match task {
            HttpTask::Header(mut header, end) => {
                /* Downstream revalidation/range, only needed when cache modified headers because otherwise origin
                 * will handle it */
                if session.upstream_headers_mutated_for_cache() {
                    self.downstream_response_conditional_filter(
                        serve_from_cache,
                        session,
                        &mut header,
                        ctx,
                    );
                    if !session.ignore_downstream_range {
                        let range_type = self.inner.range_header_filter(session, &mut header, ctx);
                        range_body_filter.set(range_type);
                    }
                }

                // TODO: just set version to Version::HTTP_11 unconditionally here,
                // (with another todo being an option to faithfully proxy the <1.1 responses)
                // as we are already trying to mutate this for HTTP/1.1 downstream reuse

                /* Convert HTTP 1.0 style response to chunked encoding so that we don't
                 * have to close the downstream connection */
                // these status codes / method cannot have body, so no need to add chunked encoding
                let no_body = session.req_header().method == http::method::Method::HEAD
                    || matches!(header.status.as_u16(), 204 | 304);
                if !no_body
                    && !header.status.is_informational()
                    && header
                        .headers
                        .get(http::header::TRANSFER_ENCODING)
                        .is_none()
                    && header.headers.get(http::header::CONTENT_LENGTH).is_none()
                    && !end
                {
                    // Upgrade the http version to 1.1 because 1.0/0.9 doesn't support chunked
                    header.set_version(Version::HTTP_11);
                    header.insert_header(http::header::TRANSFER_ENCODING, "chunked")?;
                }

                match self.inner.response_filter(session, &mut header, ctx).await {
                    Ok(_) => Ok(HttpTask::Header(header, end)),
                    Err(e) => Err(e),
                }
            }
            HttpTask::Body(data, end) => {
                if track_max_cache_size {
                    session
                        .cache
                        .track_body_bytes_for_max_file_size(data.as_ref().map_or(0, |d| d.len()));
                }

                // before it can mark it as cacheable again.
                let mut data = range_body_filter.filter_body(data);
                if let Some(duration) = self
                    .inner
                    .response_body_filter(session, &mut data, end, ctx)?
                {
                    trace!("delaying downstream response for {:?}", duration);
                    time::sleep(duration).await;
                }

                Ok(HttpTask::Body(data, end))
            }
            HttpTask::UpgradedBody(mut data, end) => {
                if track_max_cache_size {
                    session
                        .cache
                        .track_body_bytes_for_max_file_size(data.as_ref().map_or(0, |d| d.len()));
                }

                // range doesn't apply to upgraded body
                if let Some(duration) = self
                    .inner
                    .response_body_filter(session, &mut data, end, ctx)?
                {
                    trace!("delaying downstream upgraded response for {:?}", duration);
                    time::sleep(duration).await;
                }

                Ok(HttpTask::UpgradedBody(data, end))
            }
            HttpTask::Trailer(h) => Ok(HttpTask::Trailer(h)), // TODO: support trailers for h1
            HttpTask::Done => Ok(task),
            HttpTask::Failed(_) => Ok(task), // Do nothing just pass the error down
        };
        // On end, check if the response (based on file size) can be considered cacheable again
        if let Ok(task) = res.as_ref() {
            if track_max_cache_size
                && task.is_end()
                && !matches!(task, HttpTask::Failed(_))
                && !session.cache.exceeded_max_file_size()
            {
                session.cache.response_became_cacheable();
            }
        }
        res
    }

    // TODO:: use this function to replace send_body_to2
    async fn send_body_to_pipe(
        &self,
        session: &mut Session,
        mut data: Option<Bytes>,
        end_of_body: bool,
        tx: mpsc::Permit<'_, HttpTask>,
        ctx: &mut SV::CTX,
    ) -> Result<bool>
    where
        SV: ProxyHttp + Send + Sync,
        SV::CTX: Send + Sync,
    {
        // None: end of body
        // this var is to signal if downstream finish sending the body, which shouldn't be
        // affected by the request_body_filter
        let end_of_body = end_of_body || data.is_none();

        session
            .downstream_modules_ctx
            .request_body_filter(&mut data, end_of_body)
            .await?;

        // TODO: request body filter to have info about upgraded status?
        // (can also check session.was_upgraded())
        self.inner
            .request_body_filter(session, &mut data, end_of_body, ctx)
            .await?;

        // the flag to signal to upstream
        let upstream_end_of_body = end_of_body || data.is_none();

        /* It is normal to get 0 bytes because of multi-chunk or request_body_filter decides not to
         * output anything yet.
         * Don't write 0 bytes to the network since it will be
         * treated as the terminating chunk */
        if !upstream_end_of_body && data.as_ref().is_some_and(|d| d.is_empty()) {
            return Ok(false);
        }

        debug!(
            "Read {} bytes body from downstream",
            data.as_ref().map_or(-1, |d| d.len() as isize)
        );

        // upgraded body needs to be marked
        if session.was_upgraded() {
            tx.send(HttpTask::UpgradedBody(data, upstream_end_of_body));
        } else {
            tx.send(HttpTask::Body(data, upstream_end_of_body));
        }

        Ok(end_of_body)
    }
}

pub(crate) async fn send_body_to1(
    client_session: &mut HttpSessionV1,
    recv_task: Option<HttpTask>,
) -> Result<bool> {
    let body_done;

    if let Some(task) = recv_task {
        match task {
            HttpTask::Body(data, end) => {
                body_done = end;
                if let Some(d) = data {
                    let m = client_session.write_body(&d).await;
                    match m {
                        Ok(m) => match m {
                            Some(n) => {
                                debug!("Write {} bytes body to upstream", n);
                            }
                            None => {
                                warn!("Upstream body is already finished. Nothing to write");
                            }
                        },
                        Err(e) => {
                            return e.into_up().into_err();
                        }
                    }
                }
            }
            HttpTask::UpgradedBody(data, end) => {
                client_session.maybe_upgrade_body_writer();

                body_done = end;
                if let Some(d) = data {
                    let m = client_session.write_body(&d).await;
                    match m {
                        Ok(m) => {
                            match m {
                                Some(n) => {
                                    debug!("Write {} bytes upgraded body to upstream", n);
                                }
                                None => {
                                    warn!("Upstream upgraded body is already finished. Nothing to write");
                                }
                            }
                        }
                        Err(e) => {
                            return e.into_up().into_err();
                        }
                    }
                }
            }
            _ => {
                // should never happen, sender only sends body
                warn!("Unexpected task sent to upstream");
                body_done = true;
                // error here,
                // for client sessions that received upgrade but didn't
                // receive any UpgradedBody,
                // no more data is arriving so we should consider this
                // as downstream finalizing its upgrade payload
                client_session.maybe_upgrade_body_writer();
            }
        }
    } else {
        // sender dropped
        body_done = true;
        // for client sessions that received upgrade but didn't
        // receive any UpgradedBody,
        // no more data is arriving so we should consider this
        // as downstream finalizing its upgrade payload
        client_session.maybe_upgrade_body_writer();
    }

    if body_done {
        match client_session.finish_body().await {
            Ok(_) => {
                debug!("finish sending body to upstream");
                Ok(true)
            }
            Err(e) => e.into_up().into_err(),
        }
    } else {
        Ok(false)
    }
}


================================================
FILE: pingora-proxy/src/proxy_h2.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use futures::future::OptionFuture;
use futures::StreamExt;

use super::*;
use crate::proxy_cache::{range_filter::RangeBodyFilter, ServeFromCache};
use crate::proxy_common::*;
use http::{header::CONTENT_LENGTH, Method, StatusCode};
use pingora_cache::CachePhase;
use pingora_core::protocols::http::custom::CUSTOM_MESSAGE_QUEUE_SIZE;
use pingora_core::protocols::http::v2::{client::Http2Session, write_body};

// add scheme and authority as required by h2 lib
fn update_h2_scheme_authority(
    header: &mut http::request::Parts,
    raw_host: &[u8],
    tls: bool,
) -> Result<()> {
    let authority = if let Ok(s) = std::str::from_utf8(raw_host) {
        if s.starts_with('[') {
            // don't mess with ipv6 host
            s
        } else if let Some(colon) = s.find(':') {
            if s.len() == colon + 1 {
                // colon is the last char, ignore
                s
            } else if let Some(another_colon) = s[colon + 1..].find(':') {
                // try to get rid of extra port numbers
                &s[..colon + 1 + another_colon]
            } else {
                s
            }
        } else {
            s
        }
    } else {
        return Error::e_explain(
            InvalidHTTPHeader,
            format!("invalid authority from host {:?}", raw_host),
        );
    };

    let scheme = if tls { "https" } else { "http" };
    let uri = http::uri::Builder::new()
        .scheme(scheme)
        .authority(authority)
        .path_and_query(header.uri.path_and_query().as_ref().unwrap().as_str())
        .build();
    match uri {
        Ok(uri) => {
            header.uri = uri;
            Ok(())
        }
        Err(_) => Error::e_explain(
            InvalidHTTPHeader,
            format!("invalid authority from host {}", authority),
        ),
    }
}

impl<SV, C> HttpProxy<SV, C>
where
    C: custom::Connector,
{
    pub(crate) async fn proxy_down_to_up(
        &self,
        session: &mut Session,
        client_session: &mut Http2Session,
        peer: &HttpPeer,
        ctx: &mut SV::CTX,
    ) -> (bool, Option<Box<Error>>)
    // (reuse_server, error)
    where
        SV: ProxyHttp + Send + Sync,
        SV::CTX: Send + Sync,
    {
        let mut req = session.req_header().clone();

        if req.version != Version::HTTP_2 {
            /* remove H1 specific headers */
            // https://github.com/hyperium/h2/blob/d3b9f1e36aadc1a7a6804e2f8e86d3fe4a244b4f/src/proto/streams/send.rs#L72
            req.remove_header(&http::header::TRANSFER_ENCODING);
            req.remove_header(&http::header::CONNECTION);
            req.remove_header(&http::header::UPGRADE);
            req.remove_header("keep-alive");
            req.remove_header("proxy-connection");
        }

        /* turn it into h2 */
        req.set_version(Version::HTTP_2);

        if session.cache.enabled() {
            pingora_cache::filters::upstream::request_filter(
                &mut req,
                session.cache.maybe_cache_meta(),
            );
            session.mark_upstream_headers_mutated_for_cache();
        }

        match self
            .inner
            .upstream_request_filter(session, &mut req, ctx)
            .await
        {
            Ok(_) => { /* continue */ }
            Err(e) => {
                return (false, Some(e));
            }
        }

        // Remove H1 `Host` header, save it in order to add to :authority
        // We do this because certain H2 servers expect request not to have a host header.
        // The `Host` is removed after the upstream filters above for 2 reasons
        // 1. there is no API to change the :authority header
        // 2. the filter code needs to be aware of the host vs :authority across http versions otherwise
        let host = req.remove_header(&http::header::HOST);

        session.upstream_compression.request_filter(&req);
        let body_empty = session.as_mut().is_body_empty();

        // whether we support sending END_STREAM on HEADERS if body is empty
        let send_end_stream = req.send_end_stream().expect("req must be h2");

        let mut req: http::request::Parts = req.into();

        // H2 requires authority to be set, so copy that from H1 host if that is set
        if let Some(host) = host {
            if let Err(e) = update_h2_scheme_authority(&mut req, host.as_bytes(), peer.is_tls()) {
                return (false, Some(e));
            }
        }

        debug!("Request to h2: {req:?}");

        // send END_STREAM on HEADERS
        let send_header_eos = send_end_stream && body_empty;
        debug!("send END_STREAM on HEADERS: {send_end_stream}");

        let req = Box::new(RequestHeader::from(req));
        if let Err(e) = client_session.write_request_header(req, send_header_eos) {
            return (false, Some(e.into_up()));
        }

        if !send_end_stream && body_empty {
            // send END_STREAM on empty DATA frame
            match client_session.write_request_body(Bytes::new(), true).await {
                Ok(()) => debug!("sent empty DATA frame to h2"),
                Err(e) => {
                    return (false, Some(e.into_up()));
                }
            }
        }

        client_session.read_timeout = peer.options.read_timeout;

        let mut downstream_custom_message_writer = session
            .downstream_session
            .as_custom_mut()
            .and_then(|c| c.take_custom_message_writer());

        // take the body writer out of the client for easy duplex
        let mut client_body = client_session
            .take_request_body_writer()
            .expect("already send request header");

        // need to get the write_timeout here since we pass the h2 SendStream
        // directly to bidirection_down_to_up
        let write_timeout = peer.options.write_timeout;

        let (tx, rx) = mpsc::channel::<HttpTask>(TASK_BUFFER_SIZE);

        session.as_mut().enable_retry_buffering();

        /* read downstream body and upstream response at the same time */

        let ret = tokio::try_join!(
            self.bidirection_down_to_up(
                session,
                &mut client_body,
                rx,
                ctx,
                write_timeout,
                &mut downstream_custom_message_writer
            ),
            pipe_up_to_down_response(client_session, tx)
        );

        if let Some(custom_session) = session.downstream_session.as_custom_mut() {
            if let Some(downstream_custom_message_writer) = downstream_custom_message_writer {
                match custom_session.restore_custom_message_writer(downstream_custom_message_writer)
                {
                    Ok(_) => { /* continue */ }
                    Err(e) => {
                        return (false, Some(e));
                    }
                }
            }
        }

        match ret {
            Ok((downstream_can_reuse, _upstream)) => (downstream_can_reuse, None),
            Err(e) => {
                // On application level upstream read timeouts, send RST_STREAM CANCEL,
                // we know we have not received END_STREAM at this point since we read timed out
                // TODO: implement for write timeouts?
                if e.esource == ErrorSource::Upstream && matches!(e.etype, ReadTimedout) {
                    client_body.send_reset(h2::Reason::CANCEL);
                }
                (false, Some(e))
            }
        }
    }

    pub(crate) async fn proxy_to_h2_upstream(
        &self,
        session: &mut Session,
        client_session: &mut Http2Session,
        reused: bool,
        peer: &HttpPeer,
        ctx: &mut SV::CTX,
    ) -> (bool, Option<Box<Error>>)
    where
        SV: ProxyHttp + Send + Sync,
        SV::CTX: Send + Sync,
    {
        #[cfg(windows)]
        let raw = client_session.fd() as std::os::windows::io::RawSocket;
        #[cfg(unix)]
        let raw = client_session.fd();

        if let Err(e) = self
            .inner
            .connected_to_upstream(session, reused, peer, raw, client_session.digest(), ctx)
            .await
        {
            return (false, Some(e));
        }

        let (server_session_reuse, error) = self
            .proxy_down_to_up(session, client_session, peer, ctx)
            .await;

        // Record upstream response body bytes received (HTTP/2 DATA payload).
        let upstream_bytes_total = client_session.body_bytes_received();
        session.set_upstream_body_bytes_received(upstream_bytes_total);

        // Note: upstream_write_pending_time is not tracked for HTTP/2 (multiplexed streams).

        (server_session_reuse, error)
    }

    // returns whether server (downstream) session can be reused
    async fn bidirection_down_to_up(
        &self,
        session: &mut Session,
        client_body: &mut h2::SendStream<bytes::Bytes>,
        mut rx: mpsc::Receiver<HttpTask>,
        ctx: &mut SV::CTX,
        write_timeout: Option<Duration>,
        downstream_custom_message_writer: &mut Option<Box<dyn CustomMessageWrite>>,
    ) -> Result<bool>
    where
        SV: ProxyHttp + Send + Sync,
        SV::CTX: Send + Sync,
    {
        // setup custom message forwarding, if downstream supports it
        let (
            mut downstream_custom_read,
            mut downstream_custom_write,
            downstream_custom_message_custom_forwarding,
            mut downstream_custom_message_inject_rx,
            mut downstream_custom_message_reader,
        ) = if downstream_custom_message_writer.is_some() {
            let reader = session.downstream_custom_message()?;
            let (inject_tx, inject_rx) = mpsc::channel::<Bytes>(CUSTOM_MESSAGE_QUEUE_SIZE);
            (true, true, Some(inject_tx), Some(inject_rx), reader)
        } else {
            (false, false, None, None, None)
        };

        if let Some(custom_forwarding) = downstream_custom_message_custom_forwarding {
            self.inner
                .custom_forwarding(session, ctx, None, custom_forwarding)
                .await?;
        }

        let mut downstream_state = DownstreamStateMachine::new(session.as_mut().is_body_done());

        // retry, send buffer if it exists
        if let Some(buffer) = session.as_mut().get_retry_buffer() {
            self.send_body_to2(
                session,
                Some(buffer),
                downstream_state.is_done(),
                client_body,
                ctx,
                write_timeout,
            )
            .await?;
        }

        let mut response_state = ResponseStateMachine::new();

        // these two below can be wrapped into an internal ctx
        // use cache when upstream revalidates (or TODO: error)
        let mut serve_from_cache = ServeFromCache::new();
        let mut range_body_filter = proxy_cache::range_filter::RangeBodyFilter::new();

        /* duplex mode
         * see the Same function for h1 for more comments
         */
        while !downstream_state.is_done()
            || !response_state.is_done()
            || downstream_custom_read && !downstream_state.is_errored()
            || downstream_custom_write
        {
            // Use optional futures to allow using optional channels in select branches
            let custom_inject_rx_recv: OptionFuture<_> = downstream_custom_message_inject_rx
                .as_mut()
                .map(|rx| rx.recv())
                .into();
            let custom_reader_next: OptionFuture<_> = downstream_custom_message_reader
                .as_mut()
                .map(|reader| reader.next())
                .into();

            // partial read support, this check will also be false if cache is disabled.
            let support_cache_partial_read =
                session.cache.support_streaming_partial_write() == Some(true);
            let upgraded = session.was_upgraded();

            // Similar logic in h1 need to reserve capacity first to avoid deadlock
            // But we don't need to do the same because the h2 client_body pipe is unbounded (never block)
            tokio::select! {
                // NOTE: cannot avoid this copy since h2 owns the buf
                body = session.downstream_session.read_body_or_idle(downstream_state.is_done()), if downstream_state.can_poll() => {
                    debug!("downstream event");
                    let body = match body {
                        Ok(b) => b,
                        Err(e) => {
                            let wait_for_cache_fill = (!serve_from_cache.is_on() && support_cache_partial_read)
                                || serve_from_cache.is_miss();
                            if wait_for_cache_fill {
                                // ignore downstream error so that upstream can continue to write cache
                                downstream_state.to_errored();
                                warn!(
                                    "Downstream Error ignored during caching: {}, {}",
                                    e,
                                    self.inner.request_summary(session, ctx)
                                );
                                // This will not be treated as a final error, but we should signal to
                                // downstream session regardless
                                session.downstream_session.on_proxy_failure(e);
                                continue;
                           } else {
                                return Err(e.into_down());
                           }
                        }
                    };
                    let is_body_done = session.is_body_done();
                    match self.send_body_to2(session, body, is_body_done, client_body, ctx, write_timeout).await {
                        Ok(request_done) =>  {
                            downstream_state.maybe_finished(request_done);
                        },
                        Err(e) => {
                            // mark request done, attempt to drain receive
                            warn!("Upstream h2 body send error: {e}");
                            // upstream is what actually errored but we don't want to continue
                            // polling the downstream body
                            downstream_state.to_errored();
                        }
                    };
                },

                task = rx.recv(), if !response_state.upstream_done() => {
                    if let Some(t) = task {
                        debug!("upstream event: {:?}", t);
                        if serve_from_cache.should_discard_upstream() {
                            // just drain, do we need to do anything else?
                           continue;
                        }
                        // pull as many tasks as we can
                        let mut tasks = Vec::with_capacity(TASK_BUFFER_SIZE);
                        tasks.push(t);
                        // tokio::task::unconstrained because now_or_never may yield None when the future is ready
                        while let Some(maybe_task) = tokio::task::unconstrained(rx.recv()).now_or_never() {
                            if let Some(t) = maybe_task {
                                tasks.push(t);
                            } else {
                                break
                            }
                        }

                        /* run filters before sending to downstream */
                        let mut filtered_tasks = Vec::with_capacity(TASK_BUFFER_SIZE);
                        for mut t in tasks {
                            if self.revalidate_or_stale(session, &mut t, ctx).await {
                                serve_from_cache.enable();
                                response_state.enable_cached_response();
                                // skip downstream filtering entirely as the 304 will not be sent
                                break;
                            }
                            session.upstream_compression.response_filter(&mut t);
                            // check error and abort
                            // otherwise the error is surfaced via write_response_tasks()
                            if !serve_from_cache.should_send_to_downstream() {
                                if let HttpTask::Failed(e) = t {
                                    return Err(e);
                                }
                            }
                            filtered_tasks.push(
                                self.h2_response_filter(session, t, ctx,
                                    &mut serve_from_cache,
                                    &mut range_body_filter, false).await?);
                            if serve_from_cache.is_miss_header() {
                                response_state.enable_cached_response();
                            }
                        }

                        if !serve_from_cache.should_send_to_downstream() {
                            // TODO: need to derive response_done from filtered_tasks in case downstream failed already
                            continue;
                        }

                        let response_done = session.write_response_tasks(filtered_tasks).await?;
                        if session.was_upgraded() {
                            // it is very weird if the downstream session decides to upgrade
                            // since the client h2 session cannot, return an error on this case
                            return Error::e_explain(H2Error, "upgraded while proxying to h2 session");
                        }
                        response_state.maybe_set_upstream_done(response_done);
                    } else {
                        debug!("empty upstream event");
                        response_state.maybe_set_upstream_done(true);
                    }
                }

                task = serve_from_cache.next_http_task(&mut session.cache, &mut range_body_filter, upgraded),
                    if !response_state.cached_done() && !downstream_state.is_errored() && serve_from_cache.is_on() => {
                    let task = self.h2_response_filter(session, task?, ctx,
                        &mut serve_from_cache,
                        &mut range_body_filter, true).await?;
                    debug!("serve_from_cache task {task:?}");

                    match session.write_response_tasks(vec![task]).await {
                        Ok(b) => response_state.maybe_set_cache_done(b),
                        Err(e) => if serve_from_cache.is_miss() {
                            // give up writing to downstream but wait for upstream cache write to finish
                            downstream_state.to_errored();
                            response_state.maybe_set_cache_done(true);
                            warn!(
                                "Downstream Error ignored during caching: {}, {}",
                                e,
                                self.inner.request_summary(session, ctx)
                            );
                            // This will not be treated as a final error, but we should signal to
                            // downstream session regardless
                            session.downstream_session.on_proxy_failure(e);
                            continue;
                        } else {
                            return Err(e);
                        }
                    }
                    if response_state.cached_done() {
                        if let Err(e) = session.cache.finish_hit_handler().await {
                            warn!("Error during finish_hit_handler: {}", e);
                        }
                    }
                }
                data = custom_reader_next, if downstream_custom_read && !downstream_state.is_errored()  => {
                    let Some(data) = data.flatten() else {

                        downstream_custom_read = false;
                        continue;
                    };

                    let data = match data {
                        Ok(data) => data,
                        Err(err) =>  {
                            warn!("downstream_custom_message_reader got error: {err}");
                            downstream_custom_read = false;
                            continue;
                        },
                    };

                    self.inner
                        .downstream_custom_message_proxy_filter(session, data, ctx, true) // true, because it's the last hop for downstream proxying
                        .await?;
                },

                data = custom_inject_rx_recv, if downstream_custom_write => {
                    match data.flatten() {
                        Some(data) => {
                            if let Some(ref mut custom_writer) = downstream_custom_message_writer {
                                custom_writer.write_custom_message(data).await?
                            }
                        },
                        None => {
                            downstream_custom_write = false;
                            if let Some(ref mut custom_writer) = downstream_custom_message_writer {
                                custom_writer.finish_custom().await?;
                            }
                        },
                    }
                },

                else => {
                    break;
                }
            }
        }

        if let Some(custom_session) = session.downstream_session.as_custom_mut() {
            if let Some(downstream_custom_message_reader) = downstream_custom_message_reader {
                custom_session
                    .restore_custom_message_reader(downstream_custom_message_reader)
                    .expect("downstream restore_custom_message_reader should be empty");
            }
        }

        let mut reuse_downstream = !downstream_state.is_errored();
        if reuse_downstream {
            match session.as_mut().finish_body().await {
                Ok(_) => {
                    debug!("finished sending body to downstream");
                }
                Err(e) => {
                    error!("Error finish sending body to downstream: {}", e);
                    reuse_downstream = false;
                }
            }
        }
        Ok(reuse_downstream)
    }

    async fn h2_response_filter(
        &self,
        session: &mut Session,
        mut task: HttpTask,
        ctx: &mut SV::CTX,
        serve_from_cache: &mut ServeFromCache,
        range_body_filter: &mut RangeBodyFilter,
        from_cache: bool, // are the task from cache already
    ) -> Result<HttpTask>
    where
        SV: ProxyHttp + Send + Sync,
        SV::CTX: Send + Sync,
    {
        if !from_cache {
            if let Some(duration) = self.upstream_filter(session, &mut task, ctx).await? {
                trace!("delaying upstream response for {duration:?}");
                time::sleep(duration).await;
            }

            // cache the original response before any downstream transformation
            // requests that bypassed cache still need to run filters to see if the response has become cacheable
            if session.cache.enabled() || session.cache.bypassing() {
                if let Err(e) = self
                    .cache_http_task(session, &task, ctx, serve_from_cache)
                    .await
                {
                    session.cache.disable(NoCacheReason::StorageError);
                    if serve_from_cache.is_miss_body() {
                        // if the response stream cache body during miss but write fails, it has to
                        // give up the entire request
                        return Err(e);
                    } else {
                        // otherwise, continue processing the response
                        warn!(
                            "Fail to cache response: {}, {}",
                            e,
                            self.inner.request_summary(session, ctx)
                        );
                    }
                }
            }
            // skip the downstream filtering if these tasks are just for cache admission
            if !serve_from_cache.should_send_to_downstream() {
                return Ok(task);
            }
        } // else: cached/local response, no need to trigger upstream filters and caching

        // normally max file size is tracked in cache_http_task filters (when cache enabled),
        // we will track it in these filters before sending to downstream on specific conditions
        // when cache is disabled
        let track_max_cache_size = matches!(
            session.cache.phase(),
            CachePhase::Disabled(NoCacheReason::PredictedResponseTooLarge)
        );

        let res = match task {
            HttpTask::Header(mut header, eos) => {
                /* Downstream revalidation, only needed when cache is on because otherwise origin
                 * will handle it */
                if session.upstream_headers_mutated_for_cache() {
                    self.downstream_response_conditional_filter(
                        serve_from_cache,
                        session,
                        &mut header,
                        ctx,
                    );
                    if !session.ignore_downstream_range {
                        let range_type = self.inner.range_header_filter(session, &mut header, ctx);
                        range_body_filter.set(range_type);
                    }
                }

                self.inner
                    .response_filter(session, &mut header, ctx)
                    .await?;
                /* Downgrade the version so that write_response_header won't panic */
                header.set_version(Version::HTTP_11);

                // these status codes / method cannot have body, so no need to add chunked encoding
                let no_body = session.req_header().method == "HEAD"
                    || matches!(header.status.as_u16(), 204 | 304);

                /* Add chunked header to tell downstream to use chunked encoding
                 * during the absent of content-length in h2 */
                if !no_body
                    && !header.status.is_informational()
                    && header.headers.get(http::header::CONTENT_LENGTH).is_none()
                {
                    header.insert_header(http::header::TRANSFER_ENCODING, "chunked")?;
                }
                Ok(HttpTask::Header(header, eos))
            }
            HttpTask::Body(data, eos) => {
                if track_max_cache_size {
                    session
                        .cache
                        .track_body_bytes_for_max_file_size(data.as_ref().map_or(0, |d| d.len()));
                }

                let mut data = range_body_filter.filter_body(data);
                if let Some(duration) = self
                    .inner
                    .response_body_filter(session, &mut data, eos, ctx)?
                {
                    trace!("delaying downstream response for {duration:?}");
                    time::sleep(duration).await;
                }
                Ok(HttpTask::Body(data, eos))
            }
            HttpTask::UpgradedBody(..) => {
                // An h2 session should not be able to send an h2 upgraded response body,
                // and logically that is impossible unless there is a bug in the client v2 session
                panic!("Unexpected UpgradedBody task while proxy h2");
            }
            HttpTask::Trailer(mut trailers) => {
                let trailer_buffer = match trailers.as_mut() {
                    Some(trailers) => {
                        debug!("Parsing response trailers..");
                        match self
                            .inner
                            .response_trailer_filter(session, trailers, ctx)
                            .await
                        {
                            Ok(buf) => buf,
                            Err(e) => {
                                error!(
                                    "Encountered error while filtering upstream trailers {:?}",
                                    e
                                );
                                None
                            }
                        }
                    }
                    _ => None,
                };
                // if we have a trailer buffer write it to the downstream response body
                if let Some(buffer) = trailer_buffer {
                    // write_body will not write additional bytes after reaching the content-length
                    // for gRPC H2 -> H1 this is not a problem but may be a problem for non gRPC code
                    // https://http2.github.io/http2-spec/#malformed
                    Ok(HttpTask::Body(Some(buffer), true))
                } else {
                    Ok(HttpTask::Trailer(trailers))
                }
            }
            HttpTask::Done => Ok(task),
            HttpTask::Failed(_) => Ok(task), // Do nothing just pass the error down
        };
        // On end, check if the response (based on file size) can be considered cacheable again
        if let Ok(task) = res.as_ref() {
            if track_max_cache_size
                && task.is_end()
                && !matches!(task, HttpTask::Failed(_))
                && !session.cache.exceeded_max_file_size()
            {
                session.cache.response_became_cacheable();
            }
        }
        res
    }

    async fn send_body_to2(
        &self,
        session: &mut Session,
        mut data: Option<Bytes>,
        end_of_body: bool,
        client_body: &mut h2::SendStream<bytes::Bytes>,
        ctx: &mut SV::CTX,
        write_timeout: Option<Duration>,
    ) -> Result<bool>
    where
        SV: ProxyHttp + Send + Sync,
        SV::CTX: Send + Sync,
    {
        session
            .downstream_modules_ctx
            .request_body_filter(&mut data, end_of_body)
            .await?;

        self.inner
            .request_body_filter(session, &mut data, end_of_body, ctx)
            .await?;

        /* it is normal to get 0 bytes because of multi-chunk parsing or request_body_filter.
         * Although there is no harm writing empty byte to h2, unlike h1, we ignore it
         * for consistency */
        if !end_of_body && data.as_ref().is_some_and(|d| d.is_empty()) {
            return Ok(false);
        }

        if let Some(data) = data {
            debug!("Write {} bytes body to h2 upstream", data.len());
            write_body(client_body, data, end_of_body, write_timeout)
                .await
                .map_err(|e| e.into_up())?;
        } else {
            debug!("Read downstream body done");
            /* send a standalone END_STREAM flag */
            write_body(client_body, Bytes::new(), true, write_timeout)
                .await
                .map_err(|e| e.into_up())?;
        }

        Ok(end_of_body)
    }
}

/* Read response header, body and trailer from h2 upstream and send them to tx */
pub(crate) async fn pipe_up_to_down_response(
    client: &mut Http2Session,
    tx: mpsc::Sender<HttpTask>,
) -> Result<()> {
    client
        .read_response_header()
        .await
        .map_err(|e| e.into_up())?; // should we send the error as an HttpTask?

    let resp_header = Box::new(client.response_header().expect("just read").clone());

    match client.check_response_end_or_error() {
        Ok(eos) => {
            // XXX: the h2 crate won't check for content-length underflow
            // if a header frame with END_STREAM is sent without data frames
            // As stated by RFC, "204 or 304 responses contain no content,
            // as does the response to a HEAD request"
            // https://datatracker.ietf.org/doc/html/rfc9113#section-8.1.1
            let req_header = client.request_header().expect("must have sent req");
            if eos
                && req_header.method != Method::HEAD
                && resp_header.status != StatusCode::NO_CONTENT
                && resp_header.status != StatusCode::NOT_MODIFIED
                // RFC technically allows for leading zeroes
                // https://datatracker.ietf.org/doc/html/rfc9110#name-content-length
                && resp_header
                    .headers
                    .get(CONTENT_LENGTH)
                    .is_some_and(|cl| cl.as_bytes().iter().any(|b| *b != b'0'))
            {
                let _ = tx
                    .send(HttpTask::Failed(
                        Error::explain(H2Error, "non-zero content-length on EOS headers frame")
                            .into_up(),
                    ))
                    .await;
                return Ok(());
            }
            tx.send(HttpTask::Header(resp_header, eos))
                .await
                .or_err(InternalError, "sending h2 headers to pipe")?;
        }
        Err(e) => {
            // If upstream errored, then push error to downstream and then quit
            // Don't care if send fails (which means downstream already gone)
            // we were still able to retrieve the headers, so try sending
            let _ = tx.send(HttpTask::Header(resp_header, false)).await;
            let _ = tx.send(HttpTask::Failed(e.into_up())).await;
            return Ok(());
        }
    }

    while let Some(chunk) = client
        .read_response_body()
        .await
        .map_err(|e| e.into_up())
        .transpose()
    {
        let data = match chunk {
            Ok(d) => d,
            Err(e) => {
                // Push the error to downstream and then quit
                let _ = tx.send(HttpTask::Failed(e.into_up())).await;
                // Downstream should consume all remaining data and handle the error
                return Ok(());
            }
        };
        match client.check_response_end_or_error() {
            Ok(eos) => {
                let empty = data.is_empty();
                if empty && !eos {
                    /* it is normal to get 0 bytes because of multi-chunk
                     * don't write 0 bytes to downstream since it will be
                     * misread as the terminating chunk */
                    continue;
                }
                let sent = tx
                    .send(HttpTask::Body(Some(data), eos))
                    .await
                    .or_err(InternalError, "sending h2 body to pipe");
                // If the if the response with content-length is sent to an HTTP1 downstream,
                // bidirection_down_to_up() could decide that the body has finished and exit without
                // waiting for this function to signal the eos. In this case tx being closed is not
                // an sign of error. It should happen if the only thing left for the h2 to send is
                // an empty data frame with eos set.
                if sent.is_err() && eos && empty {
                    return Ok(());
                }
                sent?;
            }
            Err(e) => {
                // Similar to above, push the error to downstream and then quit
                let _ = tx.send(HttpTask::Failed(e.into_up())).await;
                return Ok(());
            }
        }
    }

    // attempt to get trailers
    let trailers = match client.read_trailers().await {
        Ok(t) => t,
        Err(e) => {
            // Similar to above, push the error to downstream and then quit
            let _ = tx.send(HttpTask::Failed(e.into_up())).await;
            return Ok(());
        }
    };

    let trailers = trailers.map(Box::new);

    if trailers.is_some() {
        tx.send(HttpTask::Trailer(trailers))
            .await
            .or_err(InternalError, "sending h2 trailer to pipe")?;
    }

    tx.send(HttpTask::Done)
        .await
        .unwrap_or_else(|_| debug!("h2 to h1 channel closed!"));

    Ok(())
}

#[test]
fn test_update_authority() {
    let mut parts = http::request::Builder::new()
        .body(())
        .unwrap()
        .into_parts()
        .0;
    update_h2_scheme_authority(&mut parts, b"example.com", true).unwrap();
    assert_eq!("example.com", parts.uri.authority().unwrap());
    update_h2_scheme_authority(&mut parts, b"example.com:456", true).unwrap();
    assert_eq!("example.com:456", parts.uri.authority().unwrap());
    update_h2_scheme_authority(&mut parts, b"example.com:", true).unwrap();
    assert_eq!("example.com:", parts.uri.authority().unwrap());
    update_h2_scheme_authority(&mut parts, b"example.com:123:345", true).unwrap();
    assert_eq!("example.com:123", parts.uri.authority().unwrap());
    update_h2_scheme_authority(&mut parts, b"[::1]", true).unwrap();
    assert_eq!("[::1]", parts.uri.authority().unwrap());

    // verify scheme
    update_h2_scheme_authority(&mut parts, b"example.com", true).unwrap();
    assert_eq!("https://example.com", parts.uri);
    update_h2_scheme_authority(&mut parts, b"example.com", false).unwrap();
    assert_eq!("http://example.com", parts.uri);
}


================================================
FILE: pingora-proxy/src/proxy_purge.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use super::*;
use pingora_core::protocols::http::error_resp;
use std::borrow::Cow;

#[derive(Debug)]
pub enum PurgeStatus {
    /// Cache was not enabled, purge ineffectual.
    NoCache,
    /// Asset was found in cache (and presumably purged or being purged).
    Found,
    /// Asset was not found in cache.
    NotFound,
    /// Cache returned a purge error.
    /// Contains causing error in case it should affect the downstream response.
    Error(Box<Error>),
}

// Return a canned response to a purge request, based on whether the cache had the asset or not
// (or otherwise returned an error).
fn purge_response(purge_status: &PurgeStatus) -> Cow<'static, ResponseHeader> {
    let resp = match purge_status {
        PurgeStatus::NoCache => &*NOT_PURGEABLE,
        PurgeStatus::Found => &*OK,
        PurgeStatus::NotFound => &*NOT_FOUND,
        PurgeStatus::Error(ref _e) => &*INTERNAL_ERROR,
    };
    Cow::Borrowed(resp)
}

fn gen_purge_response(code: u16) -> ResponseHeader {
    let mut resp = ResponseHeader::build(code, Some(3)).unwrap();
    resp.insert_header(header::SERVER, &SERVER_NAME[..])
        .unwrap();
    resp.insert_header(header::CONTENT_LENGTH, 0).unwrap();
    resp.insert_header(header::CACHE_CONTROL, "private, no-store")
        .unwrap();
    // TODO more headers?
    resp
}

static OK: Lazy<ResponseHeader> = Lazy::new(|| gen_purge_response(200));
static NOT_FOUND: Lazy<ResponseHeader> = Lazy::new(|| gen_purge_response(404));
// for when purge is sent to uncacheable assets
static NOT_PURGEABLE: Lazy<ResponseHeader> = Lazy::new(|| gen_purge_response(405));
// on cache storage or proxy error
static INTERNAL_ERROR: Lazy<ResponseHeader> = Lazy::new(|| error_resp::gen_error_response(500));

impl<SV, C> HttpProxy<SV, C>
where
    C: custom::Connector,
{
    pub(crate) async fn proxy_purge(
        &self,
        session: &mut Session,
        ctx: &mut SV::CTX,
    ) -> Option<(bool, Option<Box<Error>>)>
    where
        SV: ProxyHttp + Send + Sync,
        SV::CTX: Send + Sync,
    {
        let purge_status = if session.cache.enabled() {
            match session.cache.purge().await {
                Ok(found) => {
                    if found {
                        PurgeStatus::Found
                    } else {
                        PurgeStatus::NotFound
                    }
                }
                Err(e) => {
                    session.cache.disable(NoCacheReason::StorageError);
                    warn!(
                        "Fail to purge cache: {e}, {}",
                        self.inner.request_summary(session, ctx)
                    );
                    PurgeStatus::Error(e)
                }
            }
        } else {
            // cache was not enabled
            PurgeStatus::NoCache
        };

        let mut purge_resp = purge_response(&purge_status);
        if let Err(e) =
            self.inner
                .purge_response_filter(session, ctx, purge_status, &mut purge_resp)
        {
            error!(
                "Failed purge response filter: {e}, {}",
                self.inner.request_summary(session, ctx)
            );
            purge_resp = Cow::Borrowed(&*INTERNAL_ERROR)
        }

        let write_result = match purge_resp {
            Cow::Borrowed(r) => session.as_mut().write_response_header_ref(r).await,
            Cow::Owned(r) => session.as_mut().write_response_header(Box::new(r)).await,
        };
        let (reuse, err) = match write_result {
            Ok(_) => (true, None),
            // dirty, not reusable
            Err(e) => {
                let e = e.into_down();
                error!(
                    "Failed to send purge response: {e}, {}",
                    self.inner.request_summary(session, ctx)
                );
                (false, Some(e))
            }
        };
        Some((reuse, err))
    }
}


================================================
FILE: pingora-proxy/src/proxy_trait.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use super::*;
use pingora_cache::{
    key::HashBinary,
    CacheKey, CacheMeta, ForcedFreshness, HitHandler,
    RespCacheable::{self, *},
};
use proxy_cache::range_filter::{self};
use std::time::Duration;

/// The interface to control the HTTP proxy
///
/// The methods in [ProxyHttp] are filters/callbacks which will be performed on all requests at their
/// particular stage (if applicable).
///
/// If any of the filters returns [Result::Err], the request will fail, and the error will be logged.
#[cfg_attr(not(doc_async_trait), async_trait)]
pub trait ProxyHttp {
    /// The per request object to share state across the different filters
    type CTX;

    /// Define how the `ctx` should be created.
    fn new_ctx(&self) -> Self::CTX;

    /// Define where the proxy should send the request to.
    ///
    /// The returned [HttpPeer] contains the information regarding where and how this request should
    /// be forwarded to.
    async fn upstream_peer(
        &self,
        session: &mut Session,
        ctx: &mut Self::CTX,
    ) -> Result<Box<HttpPeer>>;

    /// Set up downstream modules.
    ///
    /// In this phase, users can add or configure [HttpModules] before the server starts up.
    ///
    /// In the default implementation of this method, [ResponseCompressionBuilder] is added
    /// and disabled.
    fn init_downstream_modules(&self, modules: &mut HttpModules) {
        // Add disabled downstream compression module by default
        modules.add_module(ResponseCompressionBuilder::enable(0));
    }

    /// Handle the incoming request.
    ///
    /// In this phase, users can parse, validate, rate limit, perform access control and/or
    /// return a response for this request.
    ///
    /// If the user already sent a response to this request, an `Ok(true)` should be returned so that
    /// the proxy would exit. The proxy continues to the next phases when `Ok(false)` is returned.
    ///
    /// By default this filter does nothing and returns `Ok(false)`.
    async fn request_filter(&self, _session: &mut Session, _ctx: &mut Self::CTX) -> Result<bool>
    where
        Self::CTX: Send + Sync,
    {
        Ok(false)
    }

    /// Handle the incoming request before any downstream module is executed.
    ///
    /// This function is similar to [Self::request_filter()] but executes before any other logic,
    /// including downstream module logic. The main purpose of this function is to provide finer
    /// grained control of the behavior of the modules.
    ///
    /// Note that because this function is executed before any module that might provide access
    /// control or rate limiting, logic should stay in request_filter() if it can in order to be
    /// protected by said modules.
    async fn early_request_filter(&self, _session: &mut Session, _ctx: &mut Self::CTX) -> Result<()>
    where
        Self::CTX: Send + Sync,
    {
        Ok(())
    }

    /// Returns whether this session is allowed to spawn subrequests.
    ///
    /// This function is checked after [Self::early_request_filter] to allow that filter to configure
    /// this if required. This will also run for subrequests themselves, which may allowed to spawn
    /// their own subrequests.
    ///
    /// Note that this doesn't prevent subrequests from being spawned based on the session by proxy
    /// core functionality, e.g. background cache revalidation requires spawning subrequests.
    fn allow_spawning_subrequest(&self, _session: &Session, _ctx: &Self::CTX) -> bool
    where
        Self::CTX: Send + Sync,
    {
        false
    }

    /// Handle the incoming request body.
    ///
    /// This function will be called every time a piece of request body is received. The `body` is
    /// **not the entire request body**.
    ///
    /// The async nature of this function allows to throttle the upload speed and/or executing
    /// heavy computation logic such as WAF rules on offloaded threads without blocking the threads
    /// who process the requests themselves.
    async fn request_body_filter(
        &self,
        _session: &mut Session,
        _body: &mut Option<Bytes>,
        _end_of_stream: bool,
        _ctx: &mut Self::CTX,
    ) -> Result<()>
    where
        Self::CTX: Send + Sync,
    {
        Ok(())
    }

    /// This filter decides if the request is cacheable and what cache backend to use
    ///
    /// The caller can interact with `Session.cache` to enable caching.
    ///
    /// By default this filter does nothing which effectively disables caching.
    // Ideally only session.cache should be modified, TODO: reflect that in this interface
    fn request_cache_filter(&self, _session: &mut Session, _ctx: &mut Self::CTX) -> Result<()>
    where
        Self::CTX: Send + Sync,
    {
        Ok(())
    }

    /// This callback generates the cache key.
    ///
    /// This callback is called only when cache is enabled for this request.
    ///
    /// There is no sensible default cache key for all proxy applications. The
    /// correct key depends on which request properties affect upstream responses
    /// (e.g. `Vary` headers, custom request filters that modify the origin host).
    /// Getting this wrong leads to cache poisoning.
    ///
    /// See `pingora-proxy/tests/utils/server_utils.rs` for a minimal (not
    /// production-ready) reference implementation.
    ///
    /// # Panics
    ///
    /// The default implementation panics. You **must** override this method when
    /// caching is enabled.
    fn cache_key_callback(&self, _session: &Session, _ctx: &mut Self::CTX) -> Result<CacheKey> {
        unimplemented!("cache_key_callback must be implemented when caching is enabled")
    }

    /// This callback is invoked when a cacheable response is ready to be admitted to cache.
    fn cache_miss(&self, session: &mut Session, _ctx: &mut Self::CTX) {
        session.cache.cache_miss();
    }

    /// This filter is called after a successful cache lookup and before the
    /// cache asset is ready to be used.
    ///
    /// This filter allows the user to log or force invalidate the asset, or
    /// to adjust the body reader associated with the cache hit.
    /// This also runs on stale hit assets (for which `is_fresh` is false).
    ///
    /// The value returned indicates if the force invalidation should be used,
    /// and which kind. Returning `None` indicates no forced invalidation
    async fn cache_hit_filter(
        &self,
        _session: &mut Session,
        _meta: &CacheMeta,
        _hit_handler: &mut HitHandler,
        _is_fresh: bool,
        _ctx: &mut Self::CTX,
    ) -> Result<Option<ForcedFreshness>>
    where
        Self::CTX: Send + Sync,
    {
        Ok(None)
    }

    /// Decide if a request should continue to upstream after not being served from cache.
    ///
    /// returns: Ok(true) if the request should continue, Ok(false) if a response was written by the
    /// callback and the session should be finished, or an error
    ///
    /// This filter can be used for deferring checks like rate limiting or access control to when they
    /// actually needed after cache miss.
    ///
    /// By default the session will attempt to be reused after returning Ok(false). It is the
    /// caller's responsibility to disable keepalive or drain the request body if needed.
    async fn proxy_upstream_filter(
        &self,
        _session: &mut Session,
        _ctx: &mut Self::CTX,
    ) -> Result<bool>
    where
        Self::CTX: Send + Sync,
    {
        Ok(true)
    }

    /// Decide if the response is cacheable
    fn response_cache_filter(
        &self,
        _session: &Session,
        _resp: &ResponseHeader,
        _ctx: &mut Self::CTX,
    ) -> Result<RespCacheable> {
        Ok(Uncacheable(NoCacheReason::Custom("default")))
    }

    /// Decide how to generate cache vary key from both request and response
    ///
    /// None means no variance is needed.
    fn cache_vary_filter(
        &self,
        _meta: &CacheMeta,
        _ctx: &mut Self::CTX,
        _req: &RequestHeader,
    ) -> Option<HashBinary> {
        // default to None for now to disable vary feature
        None
    }

    /// Decide if the incoming request's condition _fails_ against the cached response.
    ///
    /// Returning `Ok(true)` means that the response does _not_ match against the condition, and
    /// that the proxy can return `304 Not Modified` downstream.
    ///
    /// An example is a conditional GET request with `If-None-Match: "foobar"`. If the cached
    /// response contains the `ETag: "foobar"`, then the condition fails, and `304 Not Modified`
    /// should be returned. Else, the condition passes which means the full `200 OK` response must
    /// be sent.
    fn cache_not_modified_filter(
        &self,
        session: &Session,
        resp: &ResponseHeader,
        _ctx: &mut Self::CTX,
    ) -> Result<bool> {
        Ok(
            pingora_core::protocols::http::conditional_filter::not_modified_filter(
                session.req_header(),
                resp,
            ),
        )
    }

    /// This filter is called when cache is enabled to determine what byte range to return (in both
    /// cache hit and miss cases) from the response body. It is only used when caching is enabled,
    /// otherwise the upstream is responsible for any filtering. It allows users to define the range
    /// this request is for via its return type `range_filter::RangeType`.
    ///
    /// It also allow users to modify the response header accordingly.
    ///
    /// The default implementation can handle a single-range as per [RFC7232].
    ///
    /// [RFC7232]: https://www.rfc-editor.org/rfc/rfc7232
    fn range_header_filter(
        &self,
        session: &mut Session,
        resp: &mut ResponseHeader,
        _ctx: &mut Self::CTX,
    ) -> range_filter::RangeType {
        const DEFAULT_MAX_RANGES: Option<usize> = Some(200);
        proxy_cache::range_filter::range_header_filter(
            session.req_header(),
            resp,
            DEFAULT_MAX_RANGES,
        )
    }

    /// Modify the request before it is sent to the upstream
    ///
    /// Unlike [Self::request_filter()], this filter allows to change the request headers to send
    /// to the upstream.
    async fn upstream_request_filter(
        &self,
        _session: &mut Session,
        _upstream_request: &mut RequestHeader,
        _ctx: &mut Self::CTX,
    ) -> Result<()>
    where
        Self::CTX: Send + Sync,
    {
        Ok(())
    }

    /// Modify the response header from the upstream
    ///
    /// The modification is before caching, so any change here will be stored in the cache if enabled.
    ///
    /// Responses served from cache won't trigger this filter. If the cache needed revalidation,
    /// only the 304 from upstream will trigger the filter (though it will be merged into the
    /// cached header, not served directly to downstream).
    async fn upstream_response_filter(
        &self,
        _session: &mut Session,
        _upstream_response: &mut ResponseHeader,
        _ctx: &mut Self::CTX,
    ) -> Result<()>
    where
        Self::CTX: Send + Sync,
    {
        Ok(())
    }

    /// Modify the response header before it is send to the downstream
    ///
    /// The modification is after caching. This filter is called for all responses including
    /// responses served from cache.
    async fn response_filter(
        &self,
        _session: &mut Session,
        _upstream_response: &mut ResponseHeader,
        _ctx: &mut Self::CTX,
    ) -> Result<()>
    where
        Self::CTX: Send + Sync,
    {
        Ok(())
    }

    // custom_forwarding is called when downstream and upstream connections are successfully established.
    #[doc(hidden)]
    async fn custom_forwarding(
        &self,
        _session: &mut Session,
        _ctx: &mut Self::CTX,
        _custom_message_to_upstream: Option<mpsc::Sender<Bytes>>,
        _custom_message_to_downstream: mpsc::Sender<Bytes>,
    ) -> Result<()>
    where
        Self::CTX: Send + Sync,
    {
        Ok(())
    }

    // received a custom message from the downstream before sending it to the upstream.
    #[doc(hidden)]
    async fn downstream_custom_message_proxy_filter(
        &self,
        _session: &mut Session,
        custom_message: Bytes,
        _ctx: &mut Self::CTX,
        _final_hop: bool,
    ) -> Result<Option<Bytes>>
    where
        Self::CTX: Send + Sync,
    {
        Ok(Some(custom_message))
    }

    // received a custom message from the upstream before sending it to the downstream.
    #[doc(hidden)]
    async fn upstream_custom_message_proxy_filter(
        &self,
        _session: &mut Session,
        custom_message: Bytes,
        _ctx: &mut Self::CTX,
        _final_hop: bool,
    ) -> Result<Option<Bytes>>
    where
        Self::CTX: Send + Sync,
    {
        Ok(Some(custom_message))
    }

    /// Similar to [Self::upstream_response_filter()] but for response body
    ///
    /// This function will be called every time a piece of response body is received. The `body` is
    /// **not the entire response body**.
    fn upstream_response_body_filter(
        &self,
        _session: &mut Session,
        _body: &mut Option<Bytes>,
        _end_of_stream: bool,
        _ctx: &mut Self::CTX,
    ) -> Result<Option<Duration>> {
        Ok(None)
    }

    /// Similar to [Self::upstream_response_filter()] but for response trailers
    fn upstream_response_trailer_filter(
        &self,
        _session: &mut Session,
        _upstream_trailers: &mut header::HeaderMap,
        _ctx: &mut Self::CTX,
    ) -> Result<()> {
        Ok(())
    }

    /// Similar to [Self::response_filter()] but for response body chunks
    fn response_body_filter(
        &self,
        _session: &mut Session,
        _body: &mut Option<Bytes>,
        _end_of_stream: bool,
        _ctx: &mut Self::CTX,
    ) -> Result<Option<Duration>>
    where
        Self::CTX: Send + Sync,
    {
        Ok(None)
    }

    /// Similar to [Self::response_filter()] but for response trailers.
    /// Note, returning an Ok(Some(Bytes)) will result in the downstream response
    /// trailers being written to the response body.
    ///
    /// TODO: make this interface more intuitive
    async fn response_trailer_filter(
        &self,
        _session: &mut Session,
        _upstream_trailers: &mut header::HeaderMap,
        _ctx: &mut Self::CTX,
    ) -> Result<Option<Bytes>>
    where
        Self::CTX: Send + Sync,
    {
        Ok(None)
    }

    /// This filter is called when the entire response is sent to the downstream successfully or
    /// there is a fatal error that terminate the request.
    ///
    /// An error log is already emitted if there is any error. This phase is used for collecting
    /// metrics and sending access logs.
    async fn logging(&self, _session: &mut Session, _e: Option<&Error>, _ctx: &mut Self::CTX)
    where
        Self::CTX: Send + Sync,
    {
    }

    /// A value of true means that the log message will be suppressed. The default value is false.
    fn suppress_error_log(&self, _session: &Session, _ctx: &Self::CTX, _error: &Error) -> bool {
        false
    }

    /// This filter is called when there is an error **after** a connection is established (or reused)
    /// to the upstream.
    fn error_while_proxy(
        &self,
        peer: &HttpPeer,
        session: &mut Session,
        e: Box<Error>,
        _ctx: &mut Self::CTX,
        client_reused: bool,
    ) -> Box<Error> {
        let mut e = e.more_context(format!("Peer: {}", peer));
        // only reused client connections where retry buffer is not truncated
        e.retry
            .decide_reuse(client_reused && !session.as_ref().retry_buffer_truncated());
        e
    }

    /// This filter is called when there is an error in the process of establishing a connection
    /// to the upstream.
    ///
    /// In this filter the user can decide whether the error is retry-able by marking the error `e`.
    ///
    /// If the error can be retried, [Self::upstream_peer()] will be called again so that the user
    /// can decide whether to send the request to the same upstream or another upstream that is possibly
    /// available.
    fn fail_to_connect(
        &self,
        _session: &mut Session,
        _peer: &HttpPeer,
        _ctx: &mut Self::CTX,
        e: Box<Error>,
    ) -> Box<Error> {
        e
    }

    /// This filter is called when the request encounters a fatal error.
    ///
    /// Users may write an error response to the downstream if the downstream is still writable.
    ///
    /// The response status code of the error response may be returned for logging purposes.
    /// Additionally, the user can return whether this session may be reused in spite of the error.
    /// Today this reuse status is only respected for errors that occur prior to upstream peer
    /// selection, and the keepalive configured on the `Session` itself still takes precedent.
    async fn fail_to_proxy(
        &self,
        session: &mut Session,
        e: &Error,
        _ctx: &mut Self::CTX,
    ) -> FailToProxy
    where
        Self::CTX: Send + Sync,
    {
        let code = match e.etype() {
            HTTPStatus(code) => *code,
            _ => {
                match e.esource() {
                    ErrorSource::Upstream => 502,
                    ErrorSource::Downstream => {
                        match e.etype() {
                            WriteError | ReadError | ConnectionClosed => {
                                /* conn already dead */
                                0
                            }
                            _ => 400,
                        }
                    }
                    ErrorSource::Internal | ErrorSource::Unset => 500,
                }
            }
        };
        if code > 0 {
            session.respond_error(code).await.unwrap_or_else(|e| {
                error!("failed to send error response to downstream: {e}");
            });
        }

        FailToProxy {
            error_code: code,
            // default to no reuse, which is safest
            can_reuse_downstream: false,
        }
    }

    /// Decide whether should serve stale when encountering an error or during revalidation
    ///
    /// An implementation should follow
    /// <https://datatracker.ietf.org/doc/html/rfc9111#section-4.2.4>
    /// <https://www.rfc-editor.org/rfc/rfc5861#section-4>
    ///
    /// This filter is only called if cache is enabled.
    // 5xx HTTP status will be encoded as ErrorType::HTTPStatus(code)
    fn should_serve_stale(
        &self,
        _session: &mut Session,
        _ctx: &mut Self::CTX,
        error: Option<&Error>, // None when it is called during stale while revalidate
    ) -> bool {
        // A cache MUST NOT generate a stale response unless
        // it is disconnected
        // or doing so is explicitly permitted by the client or origin server
        // (e.g. headers or an out-of-band contract)
        error.is_some_and(|e| e.esource() == &ErrorSource::Upstream)
    }

    /// This filter is called when the request just established or reused a connection to the upstream
    ///
    /// This filter allows user to log timing and connection related info.
    async fn connected_to_upstream(
        &self,
        _session: &mut Session,
        _reused: bool,
        _peer: &HttpPeer,
        #[cfg(unix)] _fd: std::os::unix::io::RawFd,
        #[cfg(windows)] _sock: std::os::windows::io::RawSocket,
        _digest: Option<&Digest>,
        _ctx: &mut Self::CTX,
    ) -> Result<()>
    where
        Self::CTX: Send + Sync,
    {
        Ok(())
    }

    /// This callback is invoked every time request related error log needs to be generated
    ///
    /// Users can define what is important to be written about this request via the returned string.
    fn request_summary(&self, session: &Session, _ctx: &Self::CTX) -> String {
        session.as_ref().request_summary()
    }

    /// Whether the request should be used to invalidate(delete) the HTTP cache
    ///
    /// - `true`: this request will be used to invalidate the cache.
    /// - `false`: this request is a treated as a normal request
    fn is_purge(&self, _session: &Session, _ctx: &Self::CTX) -> bool {
        false
    }

    /// This filter is called after the proxy cache generates the downstream response to the purge
    /// request (to invalidate or delete from the HTTP cache), based on the purge status, which
    /// indicates whether the request succeeded or failed.
    ///
    /// The filter allows the user to modify or replace the generated downstream response.
    /// If the filter returns `Err`, the proxy will instead send a 500 response.
    fn purge_response_filter(
        &self,
        _session: &Session,
        _ctx: &mut Self::CTX,
        _purge_status: PurgeStatus,
        _purge_response: &mut std::borrow::Cow<'static, ResponseHeader>,
    ) -> Result<()> {
        Ok(())
    }
}

/// Context struct returned by `fail_to_proxy`.
pub struct FailToProxy {
    pub error_code: u16,
    pub can_reuse_downstream: bool,
}


================================================
FILE: pingora-proxy/src/subrequest/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use bytes::Bytes;
use pingora_cache::lock::{CacheKeyLockImpl, LockStatus, WritePermit};
use pingora_cache::CacheKey;
use pingora_core::protocols::http::subrequest::server::{
    HttpSession as SessionSubrequest, SubrequestHandle,
};
use std::any::Any;

pub mod pipe;

struct LockCtx {
    write_permit: WritePermit,
    cache_lock: &'static CacheKeyLockImpl,
    key: CacheKey,
}

// Thin wrapper to allow iterating over InputBody Vec.
pub(crate) struct InputBodyReader(std::vec::IntoIter<Bytes>);

impl InputBodyReader {
    pub fn read_body(&mut self) -> Option<Bytes> {
        self.0.next()
    }
}

/// Optional user-defined subrequest context.
pub type UserCtx = Box<dyn Any + Sync + Send>;

#[derive(Debug, Copy, Clone, Default, PartialEq, Eq)]
pub enum BodyMode {
    /// No body to be sent for subrequest.
    #[default]
    NoBody,
    /// Waiting on body if needed.
    ExpectBody,
}

#[derive(Default)]
pub struct CtxBuilder {
    lock: Option<LockCtx>,
    body_mode: BodyMode,
    user_ctx: Option<UserCtx>,
}

impl CtxBuilder {
    pub fn new() -> Self {
        Self {
            lock: None,
            body_mode: BodyMode::NoBody,
            user_ctx: None,
        }
    }

    pub fn cache_write_lock(
        mut self,
        cache_lock: &'static CacheKeyLockImpl,
        key: CacheKey,
        write_permit: WritePermit,
    ) -> Self {
        self.lock = Some(LockCtx {
            cache_lock,
            key,
            write_permit,
        });
        self
    }

    pub fn user_ctx(mut self, user_ctx: UserCtx) -> Self {
        self.user_ctx = Some(user_ctx);
        self
    }

    pub fn body_mode(mut self, body_mode: BodyMode) -> Self {
        self.body_mode = body_mode;
        self
    }

    pub fn build(self) -> Ctx {
        Ctx {
            lock: self.lock,
            body_mode: self.body_mode,
            user_ctx: self.user_ctx,
        }
    }
}

/// Context struct to share state across the parent and sub-request.
pub struct Ctx {
    body_mode: BodyMode,
    lock: Option<LockCtx>,
    // User-defined custom context.
    user_ctx: Option<UserCtx>,
}

impl Ctx {
    /// Create a [`CtxBuilder`] in order to make a new subrequest `Ctx`.
    pub fn builder() -> CtxBuilder {
        CtxBuilder::new()
    }

    /// Get a reference to the extensions inside this subrequest.
    pub fn user_ctx(&self) -> Option<&UserCtx> {
        self.user_ctx.as_ref()
    }

    /// Get a mutable reference to the extensions inside this subrequest.
    pub fn user_ctx_mut(&mut self) -> Option<&mut UserCtx> {
        self.user_ctx.as_mut()
    }

    /// Release the write lock from the subrequest (to clean up a write permit
    /// that will not be used in the cache key lock).
    pub fn release_write_lock(&mut self) {
        if let Some(lock) = self.lock.take() {
            // If we are releasing the write lock in the subrequest,
            // it means that the cache did not take it for whatever reason.
            // TransientError will cause the election of a new writer
            lock.cache_lock
                .release(&lock.key, lock.write_permit, LockStatus::TransientError);
        }
    }

    /// Take the write lock from the subrequest, for use in a cache key lock.
    pub fn take_write_lock(&mut self) -> Option<WritePermit> {
        // also clear out lock ctx
        self.lock.take().map(|lock| lock.write_permit)
    }

    /// Get the `BodyMode` when this subrequest was created.
    pub fn body_mode(&self) -> BodyMode {
        self.body_mode
    }
}

use crate::HttpSession;

pub(crate) fn create_session(parsed_session: &HttpSession) -> (HttpSession, SubrequestHandle) {
    let (session, handle) = SessionSubrequest::new_from_session(parsed_session);
    (HttpSession::new_subrequest(session), handle)
}

#[tokio::test]
async fn test_dummy_request() {
    use tokio_test::io::Builder;

    let input = b"GET / HTTP/1.1\r\n\r\n";
    let mock_io = Builder::new().read(&input[..]).build();
    let mut req = HttpSession::new_http1(Box::new(mock_io));
    req.read_request().await.unwrap();
    assert_eq!(input.as_slice(), req.to_h1_raw());

    let (mut subreq, _handle) = create_session(&req);
    subreq.read_request().await.unwrap();
    assert_eq!(input.as_slice(), subreq.to_h1_raw());
}


================================================
FILE: pingora-proxy/src/subrequest/pipe.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Subrequest piping.
//!
//! Along with subrequests themselves, subrequest piping as a feature is in
//! alpha stages, APIs are highly unstable and subject to change at any point.
//!
//! Unlike proxy_*, it is not a "true" proxy mode; the functions here help
//! establish a pipe between the main downstream session and the subrequest (which
//! in most cases will be used as a downstream session itself).
//!
//! Furthermore, only downstream modules are invoked on the main downstream session,
//! and the ProxyHttp trait filters are not run on the HttpTasks from the main session
//! (the only relevant one being the request body filter).

use crate::proxy_common::{DownstreamStateMachine, ResponseStateMachine};
use crate::subrequest::*;
use crate::{PreparedSubrequest, Session};
use bytes::Bytes;
use futures::FutureExt;
use log::{debug, warn};
use pingora_core::protocols::http::{subrequest::server::SubrequestHandle, HttpTask};
use pingora_error::{Error, ErrorType::*, OrErr, Result};
use tokio::sync::mpsc;

pub enum InputBodyType {
    /// Preset body
    Preset(InputBody),
    /// Body should be saved (up to limit)
    SaveBody(usize),
}

/// Context struct as a result of subrequest piping.
#[derive(Clone)]
pub struct PipeSubrequestState {
    /// The saved (captured) body from the main session.
    pub saved_body: Option<SavedBody>,
}

impl PipeSubrequestState {
    fn new() -> PipeSubrequestState {
        PipeSubrequestState { saved_body: None }
    }
}

pub struct PipeSubrequestError {
    pub state: PipeSubrequestState,
    /// Whether error originated (and was propagated from) subrequest itself
    /// (vs. an error that occurred while sending task)
    pub from_subreq: bool,
    pub error: Box<Error>,
}
impl PipeSubrequestError {
    pub fn new(
        error: impl Into<Box<Error>>,
        from_subreq: bool,
        state: PipeSubrequestState,
    ) -> Self {
        PipeSubrequestError {
            error: error.into(),
            from_subreq,
            state,
        }
    }
}

fn map_pipe_err<T, E: Into<Box<Error>>>(
    result: Result<T, E>,
    from_subreq: bool,
    state: &PipeSubrequestState,
) -> Result<T, PipeSubrequestError> {
    result.map_err(|e| PipeSubrequestError::new(e, from_subreq, state.clone()))
}

#[derive(Debug, Clone)]
pub struct SavedBody {
    body: Vec<Bytes>,
    complete: bool,
    truncated: bool,
    length: usize,
    max_length: usize,
}

impl SavedBody {
    pub fn new(max_length: usize) -> Self {
        SavedBody {
            body: vec![],
            complete: false,
            truncated: false,
            length: 0,
            max_length,
        }
    }

    pub fn save_body_bytes(&mut self, body_bytes: Bytes) -> bool {
        let len = body_bytes.len();
        if self.length + len > self.max_length {
            self.truncated = true;
            return false;
        }
        self.length += len;
        self.body.push(body_bytes);
        true
    }

    pub fn is_body_complete(&self) -> bool {
        self.complete && !self.truncated
    }

    pub fn set_body_complete(&mut self) {
        self.complete = true;
    }
}

#[derive(Debug, Clone)]
pub enum InputBody {
    NoBody,
    Bytes(Vec<Bytes>),
    // TODO: stream
}

impl InputBody {
    pub(crate) fn into_reader(self) -> InputBodyReader {
        InputBodyReader(match self {
            InputBody::NoBody => vec![].into_iter(),
            InputBody::Bytes(v) => v.into_iter(),
        })
    }

    pub fn is_body_empty(&self) -> bool {
        match self {
            InputBody::NoBody => true,
            InputBody::Bytes(v) => v.is_empty(),
        }
    }
}

impl std::convert::From<SavedBody> for InputBody {
    fn from(body: SavedBody) -> Self {
        if body.body.is_empty() {
            InputBody::NoBody
        } else {
            InputBody::Bytes(body.body)
        }
    }
}

pub async fn pipe_subrequest<F>(
    session: &mut Session,
    mut subrequest: PreparedSubrequest,
    subrequest_handle: SubrequestHandle,
    mut task_filter: F,
    input_body: InputBodyType,
) -> std::result::Result<PipeSubrequestState, PipeSubrequestError>
where
    F: FnMut(HttpTask) -> Result<Option<HttpTask>>,
{
    let (maybe_preset_body, saved_body) = match input_body {
        InputBodyType::Preset(body) => (Some(body), None),
        InputBodyType::SaveBody(limit) => (None, Some(SavedBody::new(limit))),
    };
    let use_preset_body = maybe_preset_body.is_some();

    let mut response_state = ResponseStateMachine::new();
    let (no_body_input, mut maybe_preset_reader) = if use_preset_body {
        let preset_body = maybe_preset_body.expect("checked above");
        (preset_body.is_body_empty(), Some(preset_body.into_reader()))
    } else {
        (session.as_mut().is_body_done(), None)
    };
    let mut downstream_state = DownstreamStateMachine::new(no_body_input);

    let mut state = PipeSubrequestState::new();
    state.saved_body = saved_body;

    // Have the subrequest remove all body-related headers if no body will be sent
    // TODO: we could also await the join handle, but subrequest may be running logging phase
    // also the full run() may also await cache fill if downstream fails
    let _join_handle = tokio::spawn(async move {
        if no_body_input {
            subrequest
                .session_mut()
                .as_subrequest_mut()
                .expect("PreparedSubrequest must be subrequest")
                .clear_request_body_headers();
        }
        subrequest.run().await
    });
    let tx = subrequest_handle.tx;
    let mut rx = subrequest_handle.rx;

    let mut wants_body = false;
    let mut wants_body_rx_err = false;
    let mut wants_body_rx = subrequest_handle.subreq_wants_body;

    let mut proxy_error_rx_err = false;
    let mut proxy_error_rx = subrequest_handle.subreq_proxy_error;

    // Note: "upstream" here refers to subrequest session tasks,
    // downstream refers to main session
    while !downstream_state.is_done() || !response_state.is_done() {
        let send_permit = tx
            .try_reserve()
            .or_err(InternalError, "try_reserve() body pipe for subrequest");

        tokio::select! {
            task = rx.recv(), if !response_state.upstream_done() => {
                debug!("upstream event: {:?}", task);
                if let Some(t) = task {
                    // pull as many tasks as we can
                    const TASK_BUFFER_SIZE: usize = 4;
                    let mut tasks = Vec::with_capacity(TASK_BUFFER_SIZE);
                    let task = map_pipe_err(task_filter(t), false, &state)?;
                    if let Some(filtered) = task {
                        tasks.push(filtered);
                    }
                    // tokio::task::unconstrained because now_or_never may yield None when the future is ready
                    while let Some(maybe_task) = tokio::task::unconstrained(rx.recv()).now_or_never() {
                        if let Some(t) = maybe_task {
                            let task = map_pipe_err(task_filter(t), false, &state)?;
                            if let Some(filtered) = task {
                                tasks.push(filtered);
                            }
                        } else {
                            break
                        }
                    }
                    // FIXME: if one of these tasks is Failed(e), the session will return that
                    // error; in this case, the error is actually from the subreq
                    let response_done = map_pipe_err(session.write_response_tasks(tasks).await, false, &state)?;

                    // NOTE: technically it is the downstream whose response state has finished here
                    // we consider the subrequest's work done however
                    response_state.maybe_set_upstream_done(response_done);
                    // unsuccessful upgrade response may force the request done
                    // (can only happen with a real session, TODO to allow with preset body)
                    downstream_state.maybe_finished(!use_preset_body && session.is_body_done());
                } else {
                    // quite possible that the subrequest may be finished, though the main session
                    // is not - we still must exit in this case
                    debug!("empty upstream event");
                    response_state.maybe_set_upstream_done(true);
                }
            },

            res = &mut wants_body_rx, if !wants_body && !wants_body_rx_err => {
                // subrequest may need time before it needs body, or it may not actually require it
                // TODO: tx send permit may not be necessary if no oneshot exists
                if res.is_err() {
                    wants_body_rx_err = true;
                } else {
                    wants_body = true;
                }
            }

            res = &mut proxy_error_rx, if !proxy_error_rx_err => {
                if let Ok(e) = res {
                    // propagate proxy error to caller
                    return Err(PipeSubrequestError::new(e, true, state));
                } else {
                    // subrequest dropped, let select loop finish
                    proxy_error_rx_err = true;
                }
            }

            _ = tx.reserve(), if downstream_state.is_reading() && send_permit.is_err() => {
                // If tx is closed, the upstream has already finished its job.
                downstream_state.maybe_finished(tx.is_closed());
                debug!("waiting for permit {send_permit:?}, upstream closed {}", tx.is_closed());
                /* No permit, wait on more capacity to avoid starving.
                 * Otherwise this select only blocks on rx, which might send no data
                 * before the entire body is uploaded.
                 * once more capacity arrives we just loop back
                 */
            },

            body = session.downstream_session.read_body_or_idle(downstream_state.is_done()),
                if wants_body && !use_preset_body && downstream_state.can_poll() && send_permit.is_ok() => {
                // this is the first subrequest
                // send the body
                debug!("downstream event: main body for subrequest");
                let body = map_pipe_err(body.map_err(|e| e.into_down()), false, &state)?;

                // If the request is websocket, `None` body means the request is closed.
                // Set the response to be done as well so that the request completes normally.
                if body.is_none() && session.is_upgrade_req() {
                    response_state.maybe_set_upstream_done(true);
                }

                let is_body_done = session.is_body_done();
                let request_done = map_pipe_err(send_body_to_pipe(
                    session,
                    body,
                    is_body_done,
                    state.saved_body.as_mut(),
                    send_permit.expect("checked is_ok()"),
                )
                .await, false, &state)?;

                downstream_state.maybe_finished(request_done);

            },

            // lazily evaluated async block allows us to expect() inside the select! branch
            body = async { maybe_preset_reader.as_mut().expect("preset body set").read_body() },
                if wants_body && use_preset_body && !downstream_state.is_done() && downstream_state.can_poll() && send_permit.is_ok() => {
                debug!("downstream event: preset body for subrequest");

                // TODO: WebSocket handling to set upstream done?

                // preset None body indicates we are done
                let is_body_done = body.is_none();
                // Don't run downstream modules on preset input body
                let request_done = map_pipe_err(do_send_body_to_pipe(
                    body,
                    is_body_done,
                    None,
                    send_permit.expect("checked is_ok()"),
                ), false, &state)?;
                downstream_state.maybe_finished(request_done);

            },

            else => break,
        }
    }
    Ok(state)
}

// Mostly the same as proxy_common, but does not run proxy request_body_filter
async fn send_body_to_pipe(
    session: &mut Session,
    mut data: Option<Bytes>,
    end_of_body: bool,
    saved_body: Option<&mut SavedBody>,
    tx: mpsc::Permit<'_, HttpTask>,
) -> Result<bool> {
    // None: end of body
    // this var is to signal if downstream finish sending the body, which shouldn't be
    // affected by the request_body_filter
    let end_of_body = end_of_body || data.is_none();

    session
        .downstream_modules_ctx
        .request_body_filter(&mut data, end_of_body)
        .await?;

    do_send_body_to_pipe(data, end_of_body, saved_body, tx)
}

fn do_send_body_to_pipe(
    data: Option<Bytes>,
    end_of_body: bool,
    mut saved_body: Option<&mut SavedBody>,
    tx: mpsc::Permit<'_, HttpTask>,
) -> Result<bool> {
    // the flag to signal to upstream
    let upstream_end_of_body = end_of_body || data.is_none();

    /* It is normal to get 0 bytes because of multi-chunk or request_body_filter decides not to
     * output anything yet.
     * Don't write 0 bytes to the network since it will be
     * treated as the terminating chunk */
    if !upstream_end_of_body && data.as_ref().is_some_and(|d| d.is_empty()) {
        return Ok(false);
    }

    debug!(
        "Read {} bytes body from downstream",
        data.as_ref().map_or(-1, |d| d.len() as isize)
    );

    if let Some(capture) = saved_body.as_mut() {
        if capture.is_body_complete() {
            warn!("subrequest trying to save body after body is complete");
        } else if let Some(d) = data.as_ref() {
            capture.save_body_bytes(d.clone());
        }
        if end_of_body {
            capture.set_body_complete();
        }
    }

    tx.send(HttpTask::Body(data, upstream_end_of_body));

    Ok(end_of_body)
}


================================================
FILE: pingora-proxy/tests/keys/key.pem
================================================
-----BEGIN EC PRIVATE KEY-----
MHcCAQEEIN5lAOvtlKwtc/LR8/U77dohJmZS30OuezU9gL6vmm6DoAoGCCqGSM49
AwEHoUQDQgAE2f/1Fm1HjySdokPq2T0F1xxol9nSEYQ+foFINeaWYk+FxMGpriJT
Bb8AGka87cWklw1ZqytfaT6pkureDbTkwg==
-----END EC PRIVATE KEY-----


================================================
FILE: pingora-proxy/tests/keys/public.pem
================================================
-----BEGIN PUBLIC KEY-----
MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAE2f/1Fm1HjySdokPq2T0F1xxol9nS
EYQ+foFINeaWYk+FxMGpriJTBb8AGka87cWklw1ZqytfaT6pkureDbTkwg==
-----END PUBLIC KEY-----


================================================
FILE: pingora-proxy/tests/keys/server.crt
================================================
-----BEGIN CERTIFICATE-----
MIIB9zCCAZ2gAwIBAgIUMI7aLvTxyRFCHhw57hGt4U6yupcwCgYIKoZIzj0EAwIw
ZDELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAkNBMRYwFAYDVQQHDA1TYW4gRnJhbmNp
c2NvMRgwFgYDVQQKDA9DbG91ZGZsYXJlLCBJbmMxFjAUBgNVBAMMDW9wZW5ydXN0
eS5vcmcwHhcNMjIwNDExMjExMzEzWhcNMzIwNDA4MjExMzEzWjBkMQswCQYDVQQG
EwJVUzELMAkGA1UECAwCQ0ExFjAUBgNVBAcMDVNhbiBGcmFuY2lzY28xGDAWBgNV
BAoMD0Nsb3VkZmxhcmUsIEluYzEWMBQGA1UEAwwNb3BlbnJ1c3R5Lm9yZzBZMBMG
ByqGSM49AgEGCCqGSM49AwEHA0IABNn/9RZtR48knaJD6tk9BdccaJfZ0hGEPn6B
SDXmlmJPhcTBqa4iUwW/ABpGvO3FpJcNWasrX2k+qZLq3g205MKjLTArMCkGA1Ud
EQQiMCCCDyoub3BlbnJ1c3R5Lm9yZ4INb3BlbnJ1c3R5Lm9yZzAKBggqhkjOPQQD
AgNIADBFAiAjISZ9aEKmobKGlT76idO740J6jPaX/hOrm41MLeg69AIhAJqKrSyz
wD/AAF5fR6tXmBqlnpQOmtxfdy13wDr4MT3h
-----END CERTIFICATE-----


================================================
FILE: pingora-proxy/tests/keys/server.csr
================================================
-----BEGIN CERTIFICATE REQUEST-----
MIIBJzCBzgIBADBsMQswCQYDVQQGEwJVUzETMBEGA1UECAwKQ2FsaWZvcm5pYTEW
MBQGA1UEBwwNU2FuIEZyYW5jaXNjbzEYMBYGA1UECgwPQ2xvdWRmbGFyZSwgSW5j
MRYwFAYDVQQDDA1vcGVucnVzdHkub3JnMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcD
QgAE2f/1Fm1HjySdokPq2T0F1xxol9nSEYQ+foFINeaWYk+FxMGpriJTBb8AGka8
7cWklw1ZqytfaT6pkureDbTkwqAAMAoGCCqGSM49BAMCA0gAMEUCIFyDN8eamnoY
XydKn2oI7qImigxahyCftzjxkIEV5IKbAiEAo5l72X4U+YTVYmyPPnJIj2v5nA1R
RuUfMh5sXzwlwuM=
-----END CERTIFICATE REQUEST-----


================================================
FILE: pingora-proxy/tests/pingora_conf.yaml
================================================
---
version: 1
client_bind_to_ipv4:
    - 127.0.0.2
ca_file: tests/keys/server.crt

================================================
FILE: pingora-proxy/tests/test_basic.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

mod utils;

use bytes::Bytes;
use h2::client;
use http::Request;
use hyper::{body::HttpBody, header::HeaderValue, Body, Client};
#[cfg(unix)]
use hyperlocal::{UnixClientExt, Uri};
use reqwest::{header, StatusCode};
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio::net::{TcpListener, TcpStream};

use utils::server_utils::init;

fn is_specified_port(port: u16) -> bool {
    (1..65535).contains(&port)
}

#[tokio::test]
async fn test_origin_alive() {
    init();
    let res = reqwest::get("http://127.0.0.1:8000/").await.unwrap();
    assert_eq!(res.status(), StatusCode::OK);
    let headers = res.headers();
    assert_eq!(headers[header::CONTENT_LENGTH], "13");
    let body = res.text().await.unwrap();
    assert_eq!(body, "Hello World!\n");
}

#[tokio::test]
async fn test_simple_proxy() {
    init();
    let res = reqwest::get("http://127.0.0.1:6147").await.unwrap();
    assert_eq!(res.status(), StatusCode::OK);

    let headers = res.headers();
    assert_eq!(headers[header::CONTENT_LENGTH], "13");
    assert_eq!(headers["x-server-addr"], "127.0.0.1:6147");
    let sockaddr = headers["x-client-addr"]
        .to_str()
        .unwrap()
        .parse::<std::net::SocketAddr>()
        .unwrap();
    assert_eq!(sockaddr.ip().to_string(), "127.0.0.1");
    assert!(is_specified_port(sockaddr.port()));

    assert_eq!(headers["x-upstream-server-addr"], "127.0.0.1:8000");
    let sockaddr = headers["x-upstream-client-addr"]
        .to_str()
        .unwrap()
        .parse::<std::net::SocketAddr>()
        .unwrap();
    assert_eq!(sockaddr.ip().to_string(), "127.0.0.2");
    assert!(is_specified_port(sockaddr.port()));

    let body = res.text().await.unwrap();
    assert_eq!(body, "Hello World!\n");
}

#[tokio::test]
#[cfg(feature = "any_tls")]
async fn test_h2_to_h1() {
    init();
    let client = reqwest::Client::builder()
        .danger_accept_invalid_certs(true)
        .build()
        .unwrap();

    let res = client
        .get("https://127.0.0.1:6150")
        .header("sni", "openrusty.org")
        .send()
        .await
        .unwrap();
    assert_eq!(res.status(), reqwest::StatusCode::OK);
    assert_eq!(res.version(), reqwest::Version::HTTP_2);

    let headers = res.headers();
    assert_eq!(headers[header::CONTENT_LENGTH], "13");
    assert_eq!(headers["x-server-addr"], "127.0.0.1:6150");

    let sockaddr = headers["x-client-addr"]
        .to_str()
        .unwrap()
        .parse::<std::net::SocketAddr>()
        .unwrap();
    assert_eq!(sockaddr.ip().to_string(), "127.0.0.1");
    assert!(is_specified_port(sockaddr.port()));

    assert_eq!(headers["x-upstream-server-addr"], "127.0.0.1:8443");
    let sockaddr = headers["x-upstream-client-addr"]
        .to_str()
        .unwrap()
        .parse::<std::net::SocketAddr>()
        .unwrap();
    assert_eq!(sockaddr.ip().to_string(), "127.0.0.2");
    assert!(is_specified_port(sockaddr.port()));

    let body = res.text().await.unwrap();
    assert_eq!(body, "Hello World!\n");
}

#[tokio::test]
#[cfg(feature = "any_tls")]
async fn test_h2_to_h2() {
    init();
    let client = reqwest::Client::builder()
        .danger_accept_invalid_certs(true)
        .build()
        .unwrap();

    let res = client
        .get("https://127.0.0.1:6150")
        .header("sni", "openrusty.org")
        .header("x-h2", "true")
        .send()
        .await
        .unwrap();
    assert_eq!(res.status(), reqwest::StatusCode::OK);
    assert_eq!(res.version(), reqwest::Version::HTTP_2);

    let headers = res.headers();
    assert_eq!(headers[header::CONTENT_LENGTH], "13");
    assert_eq!(headers["x-server-addr"], "127.0.0.1:6150");
    let sockaddr = headers["x-client-addr"]
        .to_str()
        .unwrap()
        .parse::<std::net::SocketAddr>()
        .unwrap();
    assert_eq!(sockaddr.ip().to_string(), "127.0.0.1");
    assert!(is_specified_port(sockaddr.port()));

    assert_eq!(headers["x-upstream-server-addr"], "127.0.0.1:8443");
    let sockaddr = headers["x-upstream-client-addr"]
        .to_str()
        .unwrap()
        .parse::<std::net::SocketAddr>()
        .unwrap();
    assert_eq!(sockaddr.ip().to_string(), "127.0.0.2");
    assert!(is_specified_port(sockaddr.port()));

    let body = res.text().await.unwrap();
    assert_eq!(body, "Hello World!\n");
}

#[tokio::test]
async fn test_h2c_to_h2c() {
    init();

    let client = hyper::client::Client::builder()
        .http2_only(true)
        .build_http();

    let mut req = hyper::Request::builder()
        .uri("http://127.0.0.1:6146")
        .body(Body::empty())
        .unwrap();
    req.headers_mut()
        .insert("x-h2", HeaderValue::from_bytes(b"true").unwrap());
    let res = client.request(req).await.unwrap();
    assert_eq!(res.status(), reqwest::StatusCode::OK);
    assert_eq!(res.version(), reqwest::Version::HTTP_2);

    let body = res.into_body().data().await.unwrap().unwrap();
    assert_eq!(body.as_ref(), b"Hello World!\n");
}

#[tokio::test]
async fn test_h1_on_h2c_port() {
    init();

    let client = hyper::client::Client::builder()
        .http2_only(false)
        .build_http();

    let mut req = hyper::Request::builder()
        .uri("http://127.0.0.1:6146")
        .body(Body::empty())
        .unwrap();
    req.headers_mut()
        .insert("x-h2", HeaderValue::from_bytes(b"true").unwrap());
    let res = client.request(req).await.unwrap();
    assert_eq!(res.status(), reqwest::StatusCode::OK);
    assert_eq!(res.version(), reqwest::Version::HTTP_11);

    let body = res.into_body().data().await.unwrap().unwrap();
    assert_eq!(body.as_ref(), b"Hello World!\n");
}

#[tokio::test]
#[cfg(feature = "openssl_derived")]
async fn test_h2_to_h2_host_override() {
    init();
    let client = reqwest::Client::builder()
        .danger_accept_invalid_certs(true)
        .build()
        .unwrap();

    let res = client
        .get("https://127.0.0.1:6150")
        .header("x-h2", "true")
        .header("host-override", "test.com")
        .send()
        .await
        .unwrap();
    assert_eq!(res.status(), reqwest::StatusCode::OK);
    assert_eq!(res.version(), reqwest::Version::HTTP_2);
    let headers = res.headers();
    assert_eq!(headers[header::CONTENT_LENGTH], "13");
    let body = res.text().await.unwrap();
    assert_eq!(body, "Hello World!\n");
}

#[tokio::test]
#[cfg(feature = "any_tls")]
async fn test_h2_to_h2_upload() {
    init();
    let client = reqwest::Client::builder()
        .danger_accept_invalid_certs(true)
        .build()
        .unwrap();

    let payload = "test upload";

    let res = client
        .get("https://127.0.0.1:6150/echo")
        .header("sni", "openrusty.org")
        .header("x-h2", "true")
        .body(payload)
        .send()
        .await
        .unwrap();
    assert_eq!(res.status(), reqwest::StatusCode::OK);
    assert_eq!(res.version(), reqwest::Version::HTTP_2);
    let body = res.text().await.unwrap();
    assert_eq!(body, payload);
}

#[tokio::test]
#[cfg(feature = "any_tls")]
async fn test_h2_to_h1_upload() {
    init();
    let client = reqwest::Client::builder()
        .danger_accept_invalid_certs(true)
        .build()
        .unwrap();

    let payload = "test upload";

    let res = client
        .get("https://127.0.0.1:6150/echo")
        .header("sni", "openrusty.org")
        .body(payload)
        .send()
        .await
        .unwrap();
    assert_eq!(res.status(), reqwest::StatusCode::OK);
    assert_eq!(res.version(), reqwest::Version::HTTP_2);
    let body = res.text().await.unwrap();
    assert_eq!(body, payload);
}

#[tokio::test]
#[cfg(feature = "any_tls")]
async fn test_h2_head() {
    init();
    let client = reqwest::Client::builder()
        .danger_accept_invalid_certs(true)
        .build()
        .unwrap();

    let res = client
        .head("https://127.0.0.1:6150/set_content_length")
        .header("sni", "openrusty.org")
        .header("x-h2", "true")
        .header("x-set-content-length", "11")
        .send()
        .await
        .unwrap();
    assert_eq!(res.status(), reqwest::StatusCode::OK);
    assert_eq!(res.version(), reqwest::Version::HTTP_2);
    let body = res.text().await.unwrap();
    // should not be any body, despite content-length
    assert_eq!(body, "");
}

#[cfg(unix)]
#[tokio::test]
async fn test_simple_proxy_uds() {
    init();
    let url = Uri::new("/tmp/pingora_proxy.sock", "/").into();
    let client = Client::unix();

    let res = client.get(url).await.unwrap();

    assert_eq!(res.status(), reqwest::StatusCode::OK);
    let (resp, body) = res.into_parts();

    let headers = &resp.headers;
    assert_eq!(headers[header::CONTENT_LENGTH], "13");
    assert_eq!(headers["x-server-addr"], "/tmp/pingora_proxy.sock");
    assert_eq!(headers["x-client-addr"], "unset"); // unnamed UDS

    assert_eq!(headers["x-upstream-server-addr"], "127.0.0.1:8000");
    let sockaddr = headers["x-upstream-client-addr"]
        .to_str()
        .unwrap()
        .parse::<std::net::SocketAddr>()
        .unwrap();
    assert_eq!(sockaddr.ip().to_string(), "127.0.0.2");
    assert!(is_specified_port(sockaddr.port()));

    let body = hyper::body::to_bytes(body).await.unwrap();
    assert_eq!(body.as_ref(), b"Hello World!\n");
}

#[cfg(unix)]
#[tokio::test]
async fn test_simple_proxy_uds_peer() {
    init();
    let client = reqwest::Client::new();

    let res = client
        .get("http://127.0.0.1:6147")
        .header("x-uds-peer", "1") // force upstream peer to be UDS
        .send()
        .await
        .unwrap();

    assert_eq!(res.status(), StatusCode::OK);

    let headers = &res.headers();
    assert_eq!(headers[header::CONTENT_LENGTH], "13");
    assert_eq!(headers["x-server-addr"], "127.0.0.1:6147");
    let sockaddr = headers["x-client-addr"]
        .to_str()
        .unwrap()
        .parse::<std::net::SocketAddr>()
        .unwrap();
    assert_eq!(sockaddr.ip().to_string(), "127.0.0.1");
    assert!(is_specified_port(sockaddr.port()));

    assert_eq!(headers["x-upstream-client-addr"], "unset"); // unnamed UDS
    assert_eq!(
        headers["x-upstream-server-addr"],
        "/tmp/pingora_nginx_test.sock"
    );

    let body = res.text().await.unwrap();
    assert_eq!(body, "Hello World!\n");
}

async fn test_dropped_conn_get() {
    init();
    let client = reqwest::Client::new();
    let port = "8001"; // special port to avoid unexpected connection reuse from other tests

    for _ in 1..3 {
        // load conns into pool
        let res = client
            .get("http://127.0.0.1:6147")
            .header("x-port", port)
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
    }

    let res = client
        .get("http://127.0.0.1:6147/bad_lb")
        .header("x-port", port)
        .send()
        .await
        .unwrap();

    // retry gives 200
    assert_eq!(res.status(), StatusCode::OK);
    let body = res.text().await.unwrap();
    assert_eq!(body, "dog!\n");
}

async fn test_dropped_conn_post_empty_body() {
    init();
    let client = reqwest::Client::new();
    let port = "8001"; // special port to avoid unexpected connection reuse from other tests

    for _ in 1..3 {
        // load conn into pool
        let res = client
            .get("http://127.0.0.1:6147")
            .header("x-port", port)
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
    }

    let res = client
        .post("http://127.0.0.1:6147/bad_lb")
        .header("x-port", port)
        .send()
        .await
        .unwrap();

    assert_eq!(res.status(), StatusCode::OK);
    let body = res.text().await.unwrap();
    assert_eq!(body, "dog!\n");
}

async fn test_dropped_conn_post_body() {
    init();
    let client = reqwest::Client::new();
    let port = "8001"; // special port to avoid unexpected connection reuse from other tests

    for _ in 1..3 {
        // load conn into pool
        let res = client
            .get("http://127.0.0.1:6147")
            .header("x-port", port)
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
    }

    let res = client
        .post("http://127.0.0.1:6147/bad_lb")
        .header("x-port", port)
        .body("cat!")
        .send()
        .await
        .unwrap();

    assert_eq!(res.status(), StatusCode::OK);
    let body = res.text().await.unwrap();
    assert_eq!(body, "cat!\n");
}

async fn test_dropped_conn_post_body_over() {
    init();
    let client = reqwest::Client::new();
    let port = "8001"; // special port to avoid unexpected connection reuse from other tests
    let large_body = String::from_utf8(vec![b'e'; 1024 * 64 + 1]).unwrap();

    for _ in 1..3 {
        // load conn into pool
        let res = client
            .get("http://127.0.0.1:6147")
            .header("x-port", port)
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
    }

    let res = client
        .post("http://127.0.0.1:6147/bad_lb")
        .header("x-port", port)
        .body(large_body)
        .send()
        .await
        .unwrap();

    // 502, body larger than buffer limit
    assert_eq!(res.status(), StatusCode::from_u16(502).unwrap());
}

#[tokio::test]
async fn test_dropped_conn() {
    // These tests can race with each other
    // So force run them sequentially
    test_dropped_conn_get().await;
    test_dropped_conn_post_empty_body().await;
    test_dropped_conn_post_body().await;
    test_dropped_conn_post_body_over().await;
}

// currently not supported with Rustls implementation
#[cfg(feature = "openssl_derived")]
#[tokio::test]
async fn test_tls_no_verify() {
    init();
    let client = reqwest::Client::new();
    let res = client
        .get("http://127.0.0.1:6149/tls_verify")
        .send()
        .await
        .unwrap();

    assert_eq!(res.status(), StatusCode::OK);
}

#[cfg(feature = "any_tls")]
#[tokio::test]
async fn test_tls_verify_sni_not_host() {
    init();
    let client = reqwest::Client::new();

    let res = client
        .get("http://127.0.0.1:6149/tls_verify")
        .header("sni", "openrusty.org")
        .header("verify", "1")
        .send()
        .await
        .unwrap();

    assert_eq!(res.status(), StatusCode::OK);
}

// currently not supported with Rustls implementation
#[cfg(feature = "openssl_derived")]
#[tokio::test]
async fn test_tls_none_verify_host() {
    init();
    let client = reqwest::Client::new();

    let res = client
        .get("http://127.0.0.1:6149/tls_verify")
        .header("verify", "1")
        .header("verify_host", "1")
        .send()
        .await
        .unwrap();

    assert_eq!(res.status(), StatusCode::OK);
}

#[cfg(feature = "any_tls")]
#[tokio::test]
async fn test_tls_verify_sni_host() {
    init();
    let client = reqwest::Client::new();

    let res = client
        .get("http://127.0.0.1:6149/tls_verify")
        .header("sni", "openrusty.org")
        .header("verify", "1")
        .header("verify_host", "1")
        .send()
        .await
        .unwrap();

    assert_eq!(res.status(), StatusCode::OK);
}

#[cfg(feature = "any_tls")]
#[tokio::test]
async fn test_tls_underscore_sub_sni_verify_host() {
    init();
    let client = reqwest::Client::new();

    let res = client
        .get("http://127.0.0.1:6149/tls_verify")
        .header("sni", "d_g.openrusty.org")
        .header("verify", "1")
        .header("verify_host", "1")
        .send()
        .await
        .unwrap();

    assert_eq!(res.status(), StatusCode::OK);
}

#[cfg(feature = "any_tls")]
#[tokio::test]
async fn test_tls_underscore_non_sub_sni_verify_host() {
    init();
    let client = reqwest::Client::new();

    let res = client
        .get("http://127.0.0.1:6149/tls_verify")
        .header("sni", "open_rusty.org")
        .header("verify", "1")
        .header("verify_host", "1")
        .send()
        .await
        .unwrap();

    assert_eq!(res.status(), StatusCode::BAD_GATEWAY);
    let headers = res.headers();
    assert_eq!(headers[header::CONNECTION], "close");
}

#[cfg(feature = "openssl_derived")]
#[tokio::test]
async fn test_tls_alt_verify_host() {
    init();
    let client = reqwest::Client::new();

    let res = client
        .get("http://127.0.0.1:6149/tls_verify")
        .header("sni", "open_rusty.org")
        .header("alt", "openrusty.org")
        .header("verify", "1")
        .header("verify_host", "1")
        .send()
        .await
        .unwrap();

    assert_eq!(res.status(), StatusCode::OK);
}

#[cfg(feature = "openssl_derived")]
#[tokio::test]
async fn test_tls_underscore_sub_alt_verify_host() {
    init();
    let client = reqwest::Client::new();

    let res = client
        .get("http://127.0.0.1:6149/tls_verify")
        .header("sni", "open_rusty.org")
        .header("alt", "d_g.openrusty.org")
        .header("verify", "1")
        .header("verify_host", "1")
        .send()
        .await
        .unwrap();

    assert_eq!(res.status(), StatusCode::OK);
}

#[cfg(feature = "any_tls")]
#[tokio::test]
async fn test_tls_underscore_non_sub_alt_verify_host() {
    init();
    let client = reqwest::Client::new();

    let res = client
        .get("http://127.0.0.1:6149/tls_verify")
        .header("sni", "open_rusty.org")
        .header("alt", "open_rusty.org")
        .header("verify", "1")
        .header("verify_host", "1")
        .send()
        .await
        .unwrap();

    assert_eq!(res.status(), StatusCode::BAD_GATEWAY);
}

#[tokio::test]
async fn test_upstream_compression() {
    init();

    // disable reqwest gzip support to check compression headers and body
    // otherwise reqwest will decompress and strip the headers
    let client = reqwest::ClientBuilder::new().gzip(false).build().unwrap();
    let res = client
        .get("http://127.0.0.1:6147/no_compression")
        .header("accept-encoding", "gzip")
        .send()
        .await
        .unwrap();
    assert_eq!(res.status(), StatusCode::OK);
    assert_eq!(res.headers().get("Content-Encoding").unwrap(), "gzip");
    let body = res.bytes().await.unwrap();
    assert!(body.len() < 32);

    // Next let reqwest decompress to validate the data
    let client = reqwest::ClientBuilder::new().gzip(true).build().unwrap();
    let res = client
        .get("http://127.0.0.1:6147/no_compression")
        .header("accept-encoding", "gzip")
        .send()
        .await
        .unwrap();
    assert_eq!(res.status(), StatusCode::OK);
    let body = res.bytes().await.unwrap();
    assert_eq!(body.as_ref(), &[b'B'; 32]);
}

#[tokio::test]
async fn test_downstream_compression() {
    init();

    // disable reqwest gzip support to check compression headers and body
    // otherwise reqwest will decompress and strip the headers
    let client = reqwest::ClientBuilder::new().gzip(false).build().unwrap();
    let res = client
        .get("http://127.0.0.1:6147/no_compression")
        // tell the test proxy to use downstream compression module instead of upstream
        .header("x-downstream-compression", "1")
        .header("accept-encoding", "gzip")
        .send()
        .await
        .unwrap();
    assert_eq!(res.status(), StatusCode::OK);
    assert_eq!(res.headers().get("Content-Encoding").unwrap(), "gzip");
    let body = res.bytes().await.unwrap();
    assert!(body.len() < 32);

    // Next let reqwest decompress to validate the data
    let client = reqwest::ClientBuilder::new().gzip(true).build().unwrap();
    let res = client
        .get("http://127.0.0.1:6147/no_compression")
        .header("accept-encoding", "gzip")
        .send()
        .await
        .unwrap();
    assert_eq!(res.status(), StatusCode::OK);
    let body = res.bytes().await.unwrap();
    assert_eq!(body.as_ref(), &[b'B'; 32]);
}

#[tokio::test]
async fn test_connect_close() {
    init();

    // default keep-alive
    let client = reqwest::ClientBuilder::new().build().unwrap();
    let res = client.get("http://127.0.0.1:6147").send().await.unwrap();
    assert_eq!(res.status(), StatusCode::OK);
    let headers = res.headers();
    assert_eq!(headers[header::CONTENT_LENGTH], "13");
    assert_eq!(headers[header::CONNECTION], "keep-alive");
    let body = res.text().await.unwrap();
    assert_eq!(body, "Hello World!\n");

    // close
    let client = reqwest::ClientBuilder::new().build().unwrap();
    let res = client
        .get("http://127.0.0.1:6147")
        .header("connection", "close")
        .send()
        .await
        .unwrap();
    assert_eq!(res.status(), StatusCode::OK);
    let headers = res.headers();
    assert_eq!(headers[header::CONTENT_LENGTH], "13");
    assert_eq!(headers[header::CONNECTION], "close");
    let body = res.text().await.unwrap();
    assert_eq!(body, "Hello World!\n");
}

#[tokio::test]
async fn test_connect_proxying_disallowed_h1() {
    init();

    let mut stream = TcpStream::connect("127.0.0.1:6147").await.unwrap();
    let request = b"CONNECT pingora.org:443 HTTP/1.1\r\nHost: pingora.org:443\r\n\r\n";
    stream.write_all(request).await.unwrap();

    let mut buf = [0u8; 1024];
    let read = stream.read(&mut buf).await.unwrap();
    let resp = std::str::from_utf8(&buf[..read]).unwrap();
    let status_line = resp.lines().next().unwrap_or("");
    assert!(status_line.contains(" 405 "));
}

#[tokio::test]
async fn test_connect_proxying_disallowed_h2() {
    init();

    let tcp = TcpStream::connect("127.0.0.1:6146").await.unwrap();
    let (mut h2, connection) = client::handshake(tcp).await.unwrap();
    tokio::spawn(async move {
        connection.await.unwrap();
    });

    let request = Request::builder()
        .method("CONNECT")
        .uri("http://pingora.org:443/")
        .body(())
        .unwrap();
    let (response, _body) = h2.send_request(request, true).unwrap();
    let (head, mut body) = response.await.unwrap().into_parts();
    assert_eq!(head.status.as_u16(), 405);
    while let Some(chunk) = body.data().await {
        assert!(chunk.unwrap().is_empty());
    }
}

#[tokio::test]
async fn test_connect_proxying_allowed_h1() {
    init();

    let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
    let upstream_addr = listener.local_addr().unwrap();

    // Note per RFC CONNECT 2xx responses are not allowed to have response
    // bodies, so this is non-standard behavior.
    tokio::spawn(async move {
        let (mut socket, _) = listener.accept().await.unwrap();
        let mut buf = [0u8; 1024];
        let _ = socket.read(&mut buf).await.unwrap();
        let response = b"HTTP/1.1 200 OK\r\nContent-Length: 2\r\n\r\nok";
        socket.write_all(response).await.unwrap();
        let _ = socket.shutdown().await;
    });

    let mut stream = TcpStream::connect("127.0.0.1:6160").await.unwrap();
    let request = format!(
        "CONNECT pingora.org:443 HTTP/1.1\r\nHost: pingora.org:443\r\nX-Port: {}\r\n\r\n",
        upstream_addr.port()
    );
    stream.write_all(request.as_bytes()).await.unwrap();

    let mut buf = vec![0u8; 1024];
    let read = stream.read(&mut buf).await.unwrap();
    let resp = std::str::from_utf8(&buf[..read]).unwrap();
    let status_line = resp.lines().next().unwrap_or("");
    assert!(status_line.contains(" 200 "));
    assert!(resp.ends_with("ok"));
}

#[tokio::test]
#[cfg(feature = "any_tls")]
async fn test_mtls_no_client_cert() {
    init();
    let client = reqwest::Client::new();

    let res = client
        .get("http://127.0.0.1:6149/tls_verify")
        .header("x-port", "8444")
        .header("sni", "openrusty.org")
        .header("verify", "1")
        .header("verify_host", "1")
        .send()
        .await
        .unwrap();

    // 400: because no cert
    assert_eq!(res.status(), StatusCode::BAD_REQUEST);
}

#[cfg(feature = "any_tls")]
#[tokio::test]
async fn test_mtls_no_intermediate_cert() {
    init();
    let client = reqwest::Client::new();

    let res = client
        .get("http://127.0.0.1:6149/tls_verify")
        .header("x-port", "8444")
        .header("sni", "openrusty.org")
        .header("verify", "1")
        .header("verify_host", "1")
        .header("client_cert", "1")
        .send()
        .await
        .unwrap();

    // 400: because no intermediate cert
    assert_eq!(res.status(), StatusCode::BAD_REQUEST);
}

#[tokio::test]
#[cfg(feature = "any_tls")]
async fn test_mtls() {
    init();
    let client = reqwest::Client::new();

    let res = client
        .get("http://127.0.0.1:6149/")
        .header("x-port", "8444")
        .header("sni", "openrusty.org")
        .header("verify", "1")
        .header("verify_host", "1")
        .header("client_cert", "1")
        .header("client_intermediate", "1")
        .send()
        .await
        .unwrap();
    assert_eq!(res.status(), StatusCode::OK);
}

#[cfg(feature = "any_tls")]
async fn assert_reuse(req: reqwest::RequestBuilder) {
    req.try_clone().unwrap().send().await.unwrap();
    let res = req.send().await.unwrap();
    let headers = res.headers();
    assert!(headers.get("x-conn-reuse").is_some());
}

#[cfg(feature = "any_tls")]
#[tokio::test]
async fn test_mtls_diff_cert_no_reuse() {
    init();
    let client = reqwest::Client::new();

    let req = client
        .get("http://127.0.0.1:6149/")
        .header("x-port", "8444")
        .header("sni", "openrusty.org")
        .header("verify", "1")
        .header("verify_host", "1")
        .header("client_cert", "1")
        .header("client_intermediate", "1");

    // pre check re-use
    assert_reuse(req).await;

    // different cert no re-use
    let res = client
        .get("http://127.0.0.1:6149/")
        .header("x-port", "8444")
        .header("sni", "openrusty.org")
        .header("verify", "1")
        .header("verify_host", "1")
        .header("client_cert", "2")
        .header("client_intermediate", "1")
        .send()
        .await
        .unwrap();
    assert_eq!(res.status(), StatusCode::OK);
    let headers = res.headers();
    assert!(headers.get("x-conn-reuse").is_none());
}

#[cfg(feature = "any_tls")]
#[tokio::test]
async fn test_tls_diff_verify_no_reuse() {
    init();
    let client = reqwest::Client::new();

    let req = client
        .get("http://127.0.0.1:6149/")
        .header("sni", "dog.openrusty.org")
        .header("verify", "1");

    // pre check re-use
    assert_reuse(req).await;

    // disable 'verify' no re-use
    let res = client
        .get("http://127.0.0.1:6149/")
        .header("sni", "dog.openrusty.org")
        .header("verify", "0")
        .send()
        .await
        .unwrap();
    assert_eq!(res.status(), StatusCode::OK);
    let headers = res.headers();
    assert!(headers.get("x-conn-reuse").is_none());
}

#[cfg(feature = "any_tls")]
#[tokio::test]
async fn test_tls_diff_verify_host_no_reuse() {
    init();
    let client = reqwest::Client::new();

    let req = client
        .get("http://127.0.0.1:6149/")
        .header("sni", "cat.openrusty.org")
        .header("verify", "1")
        .header("verify_host", "1");

    // pre check re-use
    assert_reuse(req).await;

    // disable 'verify_host' no re-use
    let res = client
        .get("http://127.0.0.1:6149/")
        .header("sni", "cat.openrusty.org")
        .header("verify", "1")
        .header("verify_host", "0")
        .send()
        .await
        .unwrap();
    assert_eq!(res.status(), StatusCode::OK);
    let headers = res.headers();
    assert!(headers.get("x-conn-reuse").is_none());
}

#[cfg(feature = "any_tls")]
#[tokio::test]
async fn test_tls_diff_alt_cnt_no_reuse() {
    init();
    let client = reqwest::Client::new();

    let req = client
        .get("http://127.0.0.1:6149/")
        .header("sni", "openrusty.org")
        .header("alt", "cat.com")
        .header("verify", "1")
        .header("verify_host", "1");

    // pre check re-use
    assert_reuse(req).await;

    // use alt-cn no reuse
    let res = client
        .get("http://127.0.0.1:6149/")
        .header("sni", "openrusty.org")
        .header("alt", "dog.com")
        .header("verify", "1")
        .header("verify_host", "1")
        .send()
        .await
        .unwrap();
    assert_eq!(res.status(), StatusCode::OK);
    let headers = res.headers();
    assert!(headers.get("x-conn-reuse").is_none());
}

#[cfg(feature = "s2n")]
#[tokio::test]
async fn test_tls_psk() {
    use crate::utils::server_utils::TEST_PSK_IDENTITY;

    init();
    let client = reqwest::Client::new();

    let res = client
        .get("http://127.0.0.1:6149/")
        .header("sni", "openrusty.org")
        .header("psk_identity", TEST_PSK_IDENTITY)
        .header("x-port", "6151")
        .send()
        .await
        .unwrap();
    assert_eq!(res.status(), StatusCode::OK);
}

#[cfg(feature = "s2n")]
#[tokio::test]
async fn test_tls_psk_invalid() {
    init();
    let client = reqwest::Client::new();

    let res = client
        .get("http://127.0.0.1:6149/")
        .header("sni", "openrusty.org")
        .header("psk_identity", "BAD_IDENTITY")
        .header("x-port", "6151")
        .send()
        .await
        .unwrap();
    assert_eq!(res.status(), StatusCode::BAD_GATEWAY);
}

#[tokio::test]
async fn test_error_before_headers_sent() {
    init();
    let url = "http://127.0.0.1:6146/sleep/test_error_before_headers_sent.txt";

    let tcp = TcpStream::connect("127.0.0.1:6146").await.unwrap();
    let (mut client, h2) = client::handshake(tcp).await.unwrap();

    tokio::spawn(async move {
        h2.await.unwrap();
    });

    let request = Request::builder()
        .uri(url)
        .header("x-set-sleep", "0")
        .header("x-abort", "true")
        .body(())
        .unwrap();

    let (response, mut _stream) = client.send_request(request, true).unwrap();

    let response = response.await.unwrap();
    let mut body = response.into_body();

    while let Some(chunk) = body.data().await {
        assert_eq!(chunk.unwrap(), Bytes::new());
    }
}

#[tokio::test]
async fn test_error_after_headers_sent_rst_received() {
    init();
    let url = "http://127.0.0.1:6146/connection_die/test_error_after_headers_sent_rst_received.txt";

    let tcp = TcpStream::connect("127.0.0.1:6146").await.unwrap();
    let (mut client, h2) = client::handshake(tcp).await.unwrap();

    tokio::spawn(async move {
        h2.await.unwrap();
    });

    let request = Request::builder().uri(url).body(()).unwrap();

    let (response, mut _stream) = client.send_request(request, true).unwrap();

    let response = response.await.unwrap();
    let mut body = response.into_body();

    let chunk = body.data().await.unwrap();
    assert_eq!(chunk.unwrap(), Bytes::from_static(b"AAAAA"));

    let err = body.data().await.unwrap().err().unwrap();
    assert_eq!(err.reason().unwrap(), h2::Reason::CANCEL);
}

#[tokio::test]
async fn test_103() {
    init();
    let res = reqwest::get("http://127.0.0.1:6147/103").await.unwrap();
    assert_eq!(res.status(), StatusCode::OK);
    let headers = res.headers();
    assert_eq!(headers[header::CONTENT_LENGTH], "6");
    let body = res.text().await.unwrap();
    assert_eq!(body, "123456");
}

#[tokio::test]
async fn test_103_die() {
    init();
    let res = reqwest::get("http://127.0.0.1:6147/103-die").await.unwrap();
    assert_eq!(res.status(), StatusCode::BAD_GATEWAY);
}


================================================
FILE: pingora-proxy/tests/test_upstream.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

mod utils;

use utils::server_utils::init;
use utils::websocket::{WS_ECHO, WS_ECHO_RAW};

use futures::{SinkExt, StreamExt};
use pingora_http::ResponseHeader;
use reqwest::header::{HeaderName, HeaderValue};
use reqwest::{StatusCode, Version};
use std::time::{Duration, Instant};
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio::net::TcpStream;
use tokio::time::timeout;
use tokio_tungstenite::tungstenite::{client::IntoClientRequest, Message};

#[tokio::test]
async fn test_ip_binding() {
    init();
    let res = reqwest::get("http://127.0.0.1:6147/client_ip")
        .await
        .unwrap();
    assert_eq!(res.status(), StatusCode::OK);
    let headers = res.headers();
    assert_eq!(headers["x-client-ip"], "127.0.0.2");
}

#[tokio::test]
async fn test_duplex() {
    init();
    // NOTE: this doesn't really verify that we are in full duplex mode as reqwest
    // won't allow us control when req body is sent
    let client = reqwest::Client::new();
    let res = client
        .post("http://127.0.0.1:6147/duplex/")
        .body("b".repeat(1024 * 1024)) // 1 MB upload
        .timeout(Duration::from_secs(5))
        .send()
        .await
        .unwrap();
    let headers = res.headers();
    assert_eq!(headers["Connection"], "keep-alive");
    assert_eq!(res.status(), StatusCode::OK);
    let body = res.text().await.unwrap();
    assert_eq!(body.len(), 64 * 5);
}

#[tokio::test]
async fn test_connection_die() {
    init();
    let res = reqwest::get("http://127.0.0.1:6147/connection_die")
        .await
        .unwrap();
    assert_eq!(res.status(), StatusCode::OK);
    let body = res.text().await;
    // reqwest doesn't allow us to inspect the partial body
    assert!(body.is_err());
}

#[tokio::test]
async fn test_upload_connection_die() {
    init();
    let client = reqwest::Client::new();
    let res = client
        .post("http://127.0.0.1:6147/upload_connection_die/")
        .body("b".repeat(15 * 1024 * 1024)) // 15 MB upload
        .timeout(Duration::from_secs(5))
        .send()
        .await
        .unwrap();
    // should get 200 status before connection dies
    assert_eq!(res.status(), StatusCode::OK);
    let _ = res.text().await;

    // try h2
    let client = reqwest::Client::new();
    let res = client
        .post("http://127.0.0.1:6147/upload_connection_die/")
        .body("b".repeat(15 * 1024 * 1024)) // 15 MB upload
        .timeout(Duration::from_secs(5))
        .header("x-h2", "true")
        .send()
        .await
        .unwrap();
    // should get 200 status before connection dies
    assert_eq!(res.status(), StatusCode::OK);
    let _ = res.text().await;
}

#[tokio::test]
async fn test_upload() {
    init();
    let client = reqwest::Client::new();
    let res = client
        .post("http://127.0.0.1:6147/upload/")
        .body("b".repeat(15 * 1024 * 1024)) // 15 MB upload
        .timeout(Duration::from_secs(5))
        .send()
        .await
        .unwrap();
    assert_eq!(res.status(), StatusCode::OK);
    let body = res.text().await.unwrap();
    assert_eq!(body.len(), 64 * 5);
}

#[tokio::test]
async fn test_close_on_response_before_downstream_finish() {
    init();
    let client = reqwest::Client::new();
    let res = client
        .post("http://127.0.0.1:6147/test2")
        .header("x-close-on-response-before-downstream-finish", "1")
        .body("b".repeat(15 * 1024 * 1024)) // 15 MB upload
        .timeout(Duration::from_secs(5))
        .send()
        .await
        .unwrap();
    assert_eq!(res.status(), StatusCode::OK);
    let headers = res.headers();
    assert_eq!(headers["Connection"], "close");
    let body = res.text().await.unwrap();
    assert_eq!(body.len(), 11);
}

#[tokio::test]
async fn test_ws_server_ends_conn() {
    init();
    let _ = *WS_ECHO;

    // server gracefully closes connection

    let mut req = "ws://127.0.0.1:6147".into_client_request().unwrap();
    req.headers_mut()
        .insert("x-port", HeaderValue::from_static("9283"));

    let (mut ws_stream, _) = tokio_tungstenite::connect_async(req).await.unwrap();
    // gracefully close connection
    ws_stream.send("test".into()).await.unwrap();
    ws_stream.next().await.unwrap().unwrap();
    ws_stream.send("graceful".into()).await.unwrap();
    let msg = ws_stream.next().await.unwrap().unwrap();
    // assert graceful close
    assert!(matches!(msg, Message::Close(None)));
    // test may hang here if downstream doesn't close when upstream does
    assert!(ws_stream.next().await.is_none());

    // server abruptly closes connection

    let mut req = "ws://127.0.0.1:6147".into_client_request().unwrap();
    req.headers_mut()
        .insert("x-port", HeaderValue::from_static("9283"));

    let (mut ws_stream, _) = tokio_tungstenite::connect_async(req).await.unwrap();
    // abrupt close connection
    ws_stream.send("close".into()).await.unwrap();
    // test will hang here if downstream doesn't close when upstream does
    assert!(ws_stream.next().await.unwrap().is_err());

    // client gracefully closes connection

    let mut req = "ws://127.0.0.1:6147".into_client_request().unwrap();
    req.headers_mut()
        .insert("x-port", HeaderValue::from_static("9283"));

    let (mut ws_stream, _) = tokio_tungstenite::connect_async(req).await.unwrap();
    ws_stream.send("test".into()).await.unwrap();
    // sender initiates close
    ws_stream.close(None).await.unwrap();
    let msg = ws_stream.next().await.unwrap().unwrap();
    // assert echo
    assert_eq!("test", msg.into_text().unwrap());
    let msg = ws_stream.next().await.unwrap().unwrap();
    // assert graceful close
    assert!(matches!(msg, Message::Close(None)));
    assert!(ws_stream.next().await.is_none());
}

fn parse_response_header(buf: &[u8]) -> ResponseHeader {
    let mut headers = vec![httparse::EMPTY_HEADER; 256];
    let mut parsed = httparse::Response::new(&mut headers);
    match parsed.parse(buf).unwrap() {
        httparse::Status::Complete(_) => {
            let mut resp =
                ResponseHeader::build(parsed.code.unwrap(), Some(parsed.headers.len())).unwrap();
            for header in parsed.headers.iter() {
                resp.append_header(header.name.to_string(), header.value)
                    .unwrap();
            }
            resp
        }
        _ => panic!("expects a whole response header"),
    }
}

/// Read response header and return it along with any preread body data
async fn read_response_header(stream: &mut tokio::net::TcpStream) -> (ResponseHeader, Vec<u8>) {
    let mut response = vec![];
    let mut header_end = 0;
    let mut buf = [0; 1024];
    loop {
        let n = stream.read(&mut buf).await.unwrap();
        response.extend_from_slice(&buf[..n]);
        let mut end_of_response = false;
        for (i, w) in response.windows(4).enumerate() {
            if w == b"\r\n\r\n" {
                end_of_response = true;
                header_end = i + 4;
                break;
            }
        }
        if end_of_response {
            break;
        }
    }
    let response_header = parse_response_header(&response[..header_end]);
    let preread_body = response[header_end..].to_vec();
    (response_header, preread_body)
}

/// Read remaining body bytes from stream until expected_body_len is reached
async fn read_response_body(
    stream: &mut tokio::net::TcpStream,
    mut body: Vec<u8>,
    expected_body_len: usize,
) -> Vec<u8> {
    let mut buf = [0; 1024];
    while body.len() < expected_body_len {
        let n = stream.read(&mut buf).await.unwrap();
        body.extend_from_slice(&buf[..n]);
    }
    if body.len() > expected_body_len {
        panic!("more body bytes than expected");
    }
    body
}

async fn read_response(
    stream: &mut tokio::net::TcpStream,
    expected_body_len: usize,
) -> (ResponseHeader, Vec<u8>) {
    let (response_header, body) = read_response_header(stream).await;
    let body = read_response_body(stream, body, expected_body_len).await;
    (response_header, body)
}

#[tokio::test]
async fn test_upgrade_smoke() {
    init();

    let mut stream = TcpStream::connect("127.0.0.1:6147").await.unwrap();

    let req = concat!(
        "GET /upgrade HTTP/1.1\r\n",
        "Host: 127.0.0.1\r\n",
        "Upgrade: websocket\r\n",
        "Connection: Upgrade\r\n",
        "\r\n"
    );
    stream.write_all(req.as_bytes()).await.unwrap();
    stream.flush().await.unwrap();

    let expected_payload = b"hello\n";
    let fut = read_response(&mut stream, expected_payload.len());
    let (resp_header, resp_body) = timeout(Duration::from_secs(5), fut).await.unwrap();

    assert_eq!(resp_header.status, 101);
    assert_eq!(resp_header.headers["Upgrade"], "websocket");
    assert_eq!(resp_header.headers["Connection"], "upgrade");
    assert_eq!(resp_body, expected_payload);
}

#[tokio::test]
async fn test_upgrade_body() {
    init();

    let mut stream = TcpStream::connect("127.0.0.1:6147").await.unwrap();

    let req = concat!(
        "POST /upgrade_echo_body HTTP/1.1\r\n",
        "Host: 127.0.0.1\r\n",
        "Upgrade: websocket\r\n",
        "Connection: Upgrade\r\n",
        "Content-Length: 1024\r\n",
        "\r\n"
    );
    stream.write_all(req.as_bytes()).await.unwrap();
    stream.flush().await.unwrap();
    stream.write_all("b".repeat(1024).as_bytes()).await.unwrap();
    stream.flush().await.unwrap();

    let fut = read_response(&mut stream, 1024);
    let (resp_header, resp_body) = timeout(Duration::from_secs(5), fut).await.unwrap();
    assert_eq!(resp_header.status, 101);
    assert_eq!(resp_header.headers["Upgrade"], "websocket");
    assert_eq!(resp_header.headers["Connection"], "upgrade");

    let body = "b".repeat(1024);
    assert_eq!(resp_body, body.as_bytes());
}

#[tokio::test]
async fn test_upgrade_body_after_101() {
    // test content-length body is passed through after 101,
    // and that ws payload is passed through afterwards
    // use websocket server that flushes 101 after reading header
    init();
    let _ = *WS_ECHO_RAW;

    let mut stream = TcpStream::connect("127.0.0.1:6147").await.unwrap();

    let req = concat!(
        "POST /upgrade_echo_body HTTP/1.1\r\n",
        "Host: 127.0.0.1\r\n",
        "Upgrade: websocket\r\n",
        "Connection: Upgrade\r\n",
        "X-Port: 9284\r\n",
        "Content-Length: 5120\r\n",
        "X-Expected-Body-Len: 5125\r\n", // include ws payload
        "\r\n"
    );
    stream.write_all(req.as_bytes()).await.unwrap();
    stream.flush().await.unwrap();
    stream
        .write_all("b".repeat(5 * 1024).as_bytes())
        .await
        .unwrap();
    stream.flush().await.unwrap();

    // Read response header and any preread body first (before sending ws_payload)
    let fut = read_response_header(&mut stream);
    let (resp_header, resp_body) = timeout(Duration::from_secs(5), fut).await.unwrap();
    assert_eq!(resp_header.status, 101);
    assert_eq!(resp_header.headers["Upgrade"], "websocket");
    assert_eq!(resp_header.headers["Connection"], "upgrade");

    // Now send the websocket payload after receiving 101
    let ws_payload = "hello";
    stream.write_all(ws_payload.as_bytes()).await.unwrap();
    stream.flush().await.unwrap();

    // Read the rest of the bytes (body + ws payload), subtracting preread body length
    let expected_total_len = 5 * 1024 + ws_payload.len();
    let fut = read_response_body(&mut stream, resp_body, expected_total_len);
    let resp_body = timeout(Duration::from_secs(5), fut).await.unwrap();

    let body = "b".repeat(5 * 1024) + ws_payload;
    assert_eq!(resp_body, body.as_bytes());
}

#[tokio::test]
async fn test_download_timeout() {
    init();
    use hyper::body::HttpBody;
    use tokio::time::sleep;

    let client = hyper::Client::new();
    let uri: hyper::Uri = "http://127.0.0.1:6147/download_large/".parse().unwrap();
    let req = hyper::Request::builder()
        .uri(uri)
        .header("x-write-timeout", "1")
        .body(hyper::Body::empty())
        .unwrap();
    let mut res = client.request(req).await.unwrap();
    assert_eq!(res.status(), StatusCode::OK);

    let mut err = false;
    sleep(Duration::from_secs(2)).await;
    while let Some(chunk) = res.body_mut().data().await {
        if chunk.is_err() {
            err = true;
        }
    }
    assert!(err);
}

#[tokio::test]
async fn test_download_timeout_min_rate() {
    init();
    use hyper::body::HttpBody;
    use tokio::time::sleep;

    let client = hyper::Client::new();
    let uri: hyper::Uri = "http://127.0.0.1:6147/download/".parse().unwrap();
    let req = hyper::Request::builder()
        .uri(uri)
        .header("x-write-timeout", "1")
        .header("x-min-rate", "10000")
        .body(hyper::Body::empty())
        .unwrap();
    let mut res = client.request(req).await.unwrap();
    assert_eq!(res.status(), StatusCode::OK);

    let mut err = false;
    sleep(Duration::from_secs(2)).await;
    while let Some(chunk) = res.body_mut().data().await {
        if chunk.is_err() {
            err = true;
        }
    }
    // no error as write timeout is overridden by min rate
    assert!(!err);
}

mod test_cache {
    use super::*;
    use std::str::FromStr;
    use tokio::time::sleep;

    #[tokio::test]
    async fn test_basic_caching() {
        init();
        let url = "http://127.0.0.1:6148/unique/test_basic_caching/now";

        let res = reqwest::get(url).await.unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let cache_miss_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "hello world");

        let res = reqwest::get(url).await.unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let cache_hit_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), "hello world");

        assert_eq!(cache_miss_epoch, cache_hit_epoch);

        sleep(Duration::from_millis(1100)).await; // ttl is 1

        let res = reqwest::get(url).await.unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let cache_expired_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "expired");
        assert_eq!(res.text().await.unwrap(), "hello world");

        assert!(cache_expired_epoch > cache_hit_epoch);
    }

    #[tokio::test]
    async fn test_purge() {
        init();
        let res = reqwest::get("http://127.0.0.1:6148/unique/test_purge/test2")
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "hello world");

        let res = reqwest::get("http://127.0.0.1:6148/unique/test_purge/test2")
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), "hello world");

        let res = reqwest::Client::builder()
            .build()
            .unwrap()
            .request(
                reqwest::Method::from_bytes(b"PURGE").unwrap(),
                "http://127.0.0.1:6148/unique/test_purge/test2",
            )
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        assert_eq!(res.text().await.unwrap(), "");

        let res = reqwest::Client::builder()
            .build()
            .unwrap()
            .request(
                reqwest::Method::from_bytes(b"PURGE").unwrap(),
                "http://127.0.0.1:6148/unique/test_purge/test2",
            )
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::NOT_FOUND);
        assert_eq!(res.text().await.unwrap(), "");

        let res = reqwest::get("http://127.0.0.1:6148/unique/test_purge/test2")
            .await
            .unwrap();
        let headers = res.headers();
        assert_eq!(res.status(), StatusCode::OK);
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "hello world");
    }

    #[tokio::test]
    async fn test_cache_miss_convert() {
        init();

        // test if-* header is stripped
        let client = reqwest::Client::new();
        let res = client
            .get("http://127.0.0.1:6148/unique/test_cache_miss_convert/no_if_headers")
            .header("if-modified-since", "Wed, 19 Jan 2022 18:39:12 GMT")
            .send()
            .await
            .unwrap();
        // 200 because last-modified not returned from upstream
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "no if headers detected\n");

        // test range header is stripped
        let client = reqwest::Client::new();
        let res = client
            .get("http://127.0.0.1:6148/unique/test_cache_miss_convert2/no_if_headers")
            .header("Range", "bytes=0-1")
            .send()
            .await
            .unwrap();
        // we have not implemented downstream range yet, it should be 206 once we have it
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "no if headers detected\n");
    }

    #[tokio::test]
    async fn test_cache_http10() {
        // allow caching http1.0 from origin, but proxy as h1.1 downstream
        init();
        let url = "http://127.0.0.1:6148/unique/test_cache_http10/now";

        let res = reqwest::Client::new()
            .get(url)
            .header("x-upstream-fake-http10", "1") // fake http1.0 in upstream response filter
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        assert_eq!(res.version(), Version::HTTP_11);
        let headers = res.headers();
        let cache_miss_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["transfer-encoding"], "chunked");
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "hello world");

        let res = reqwest::Client::new()
            .get(url)
            .header("x-upstream-fake-http10", "1")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        assert_eq!(res.version(), Version::HTTP_11);
        let headers = res.headers();
        let cache_hit_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["transfer-encoding"], "chunked");
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), "hello world");

        assert_eq!(cache_miss_epoch, cache_hit_epoch);

        sleep(Duration::from_millis(1100)).await; // ttl is 1

        let res = reqwest::Client::new()
            .get(url)
            .header("x-upstream-fake-http10", "1")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        assert_eq!(res.version(), Version::HTTP_11);
        let headers = res.headers();
        let cache_expired_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["transfer-encoding"], "chunked");
        assert_eq!(headers["x-cache-status"], "expired");
        assert_eq!(res.text().await.unwrap(), "hello world");

        assert!(cache_expired_epoch > cache_hit_epoch);
    }

    #[tokio::test]
    async fn test_cache_downstream_compression() {
        init();

        // disable reqwest gzip support to check compression headers and body
        // otherwise reqwest will decompress and strip the headers
        let client = reqwest::ClientBuilder::new().gzip(false).build().unwrap();
        let res = client
            .get("http://127.0.0.1:6148/unique/test_cache_downstream_compression/no_compression")
            .header("x-downstream-compression", "1")
            .header("accept-encoding", "gzip")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["Content-Encoding"], "gzip");
        assert_eq!(headers["x-cache-status"], "miss");
        let body = res.bytes().await.unwrap();
        assert!(body.len() < 32);

        // should also apply on hit
        let client = reqwest::ClientBuilder::new().gzip(false).build().unwrap();
        let res = client
            .get("http://127.0.0.1:6148/unique/test_cache_downstream_compression/no_compression")
            .header("x-downstream-compression", "1")
            .header("accept-encoding", "gzip")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["Content-Encoding"], "gzip");
        assert_eq!(headers["x-cache-status"], "hit");
        let body = res.bytes().await.unwrap();
        assert!(body.len() < 32);
    }

    #[tokio::test]
    async fn test_cache_downstream_decompression() {
        init();

        // disable reqwest gzip support to check compression headers and body
        // otherwise reqwest will decompress and strip the headers
        let client = reqwest::ClientBuilder::new().gzip(false).build().unwrap();
        let res = client
            .get("http://127.0.0.1:6148/unique/test_cache_downstream_decompression/gzip/index.html")
            .header("x-downstream-decompression", "1")
            .header("x-upstream-accept-encoding", "gzip")
            .send()
            .await
            .unwrap();

        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        // upstream should have received gzip, should decompress for downstream
        assert_eq!(headers["received-accept-encoding"], "gzip");
        assert!(headers.get("Content-Encoding").is_none());
        assert_eq!(headers["x-cache-status"], "miss");
        let body = res.bytes().await.unwrap();
        assert_eq!(body, "Hello World!\n");

        // should also apply on hit
        let client = reqwest::ClientBuilder::new().gzip(false).build().unwrap();
        let res = client
            .get("http://127.0.0.1:6148/unique/test_cache_downstream_decompression/gzip/index.html")
            .header("x-downstream-decompression", "1")
            .header("x-upstream-accept-encoding", "gzip")
            .send()
            .await
            .unwrap();

        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert!(headers.get("Content-Encoding").is_none());
        assert_eq!(headers["x-cache-status"], "hit");
        let body = res.bytes().await.unwrap();
        assert_eq!(body, "Hello World!\n");

        sleep(Duration::from_millis(1100)).await; // ttl is 1

        // should also apply on revalidated
        let client = reqwest::ClientBuilder::new().gzip(false).build().unwrap();
        let res = client
            .get("http://127.0.0.1:6148/unique/test_cache_downstream_decompression/gzip/index.html")
            .header("x-downstream-decompression", "1")
            .header("x-upstream-accept-encoding", "gzip")
            .send()
            .await
            .unwrap();

        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert!(headers.get("Content-Encoding").is_none());
        assert_eq!(headers["x-cache-status"], "revalidated");
        let body = res.bytes().await.unwrap();
        assert_eq!(body, "Hello World!\n");
    }

    #[tokio::test]
    async fn test_network_error_mid_response() {
        init();
        let url = "http://127.0.0.1:6148/sleep/test_network_error_mid_response.txt";

        let res = reqwest::Client::new()
            .get(url)
            .header("x-set-sleep", "0") // no need to sleep
            .header("x-set-body-sleep", "0.1") // pause the body a bit before abort
            .header("x-abort-body", "true") // this will tell origin to kill the conn right away
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), 200);
        // sleep just a little to make sure the req above gets the cache lock
        sleep(Duration::from_millis(50)).await;
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        // the connection dies
        assert!(res.text().await.is_err());

        let res = reqwest::Client::new()
            .get(url)
            .header("x-set-sleep", "0") // no need to sleep
            .header("x-set-body-sleep", "0.1") // pause the body a bit before abort
            .header("x-abort-body", "true") // this will tell origin to kill the conn right away
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), 200);
        // sleep just a little to make sure the req above gets the cache lock
        sleep(Duration::from_millis(50)).await;
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        // the connection dies
        assert!(res.text().await.is_err());
    }

    #[tokio::test]
    async fn test_cache_upstream_revalidation() {
        init();
        let url = "http://127.0.0.1:6148/unique/test_upstream_revalidation/revalidate_now";

        let res = reqwest::get(url).await.unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let cache_miss_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(headers["x-upstream-status"], "200");
        assert_eq!(res.text().await.unwrap(), "hello world");

        let res = reqwest::get(url).await.unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let cache_hit_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "hit");
        assert!(headers.get("x-upstream-status").is_none());
        assert_eq!(res.text().await.unwrap(), "hello world");

        assert_eq!(cache_miss_epoch, cache_hit_epoch);

        sleep(Duration::from_millis(1100)).await; // ttl is 1

        let res = reqwest::get(url).await.unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let cache_expired_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "revalidated");
        assert_eq!(headers["x-upstream-status"], "304");
        assert_eq!(res.text().await.unwrap(), "hello world");

        // still the old object
        assert_eq!(cache_expired_epoch, cache_hit_epoch);
    }

    #[tokio::test]
    async fn test_cache_upstream_revalidation_appends_headers() {
        init();
        let url = "http://127.0.0.1:6148/unique/test_cache_upstream_revalidation_appends_headers/cache_control";

        let res = reqwest::Client::new()
            .get(url)
            .header("set-cache-control", "public, max-age=1")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(headers["x-upstream-status"], "200");
        assert_eq!(headers["cache-control"], "public, max-age=1");
        assert_eq!(headers.get_all("cache-control").into_iter().count(), 1);
        assert_eq!(res.text().await.unwrap(), "hello world");

        let res = reqwest::Client::new()
            .get(url)
            .header("set-cache-control", "public, max-age=1")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");
        assert!(headers.get("x-upstream-status").is_none());
        assert_eq!(headers.get_all("cache-control").into_iter().count(), 1);
        assert_eq!(res.text().await.unwrap(), "hello world");

        sleep(Duration::from_millis(1100)).await; // ttl is 1

        let res = reqwest::Client::new()
            .get(url)
            .header("set-cache-control", "public, max-age=1")
            .header("set-cache-control", "stale-while-revalidate=86400")
            .header("set-revalidated", "1")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "revalidated");
        assert_eq!(headers["x-upstream-status"], "304");
        let mut cc = headers.get_all("cache-control").into_iter();
        assert_eq!(cc.next().unwrap(), "public, max-age=1");
        assert_eq!(cc.next().unwrap(), "stale-while-revalidate=86400");
        assert!(cc.next().is_none());
        assert_eq!(res.text().await.unwrap(), "hello world");
    }

    #[tokio::test]
    async fn test_force_miss() {
        init();
        let url = "http://127.0.0.1:6148/unique/test_froce_miss/revalidate_now";

        let res = reqwest::get(url).await.unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let cache_miss_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(headers["x-upstream-status"], "200");
        assert_eq!(res.text().await.unwrap(), "hello world");

        let res = reqwest::get(url).await.unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let cache_hit_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "hit");
        assert!(headers.get("x-upstream-status").is_none());
        assert_eq!(res.text().await.unwrap(), "hello world");

        assert_eq!(cache_miss_epoch, cache_hit_epoch);

        let res = reqwest::Client::new()
            .get(url)
            .header("x-force-miss", "1")
            .send()
            .await
            .unwrap();

        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(headers["x-upstream-status"], "200");
        assert_eq!(res.text().await.unwrap(), "hello world");
    }

    #[tokio::test]
    async fn test_force_miss_stale() {
        init();
        let url = "http://127.0.0.1:6148/unique/test_froce_miss_stale/revalidate_now";

        let res = reqwest::get(url).await.unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let cache_miss_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(headers["x-upstream-status"], "200");
        assert_eq!(res.text().await.unwrap(), "hello world");

        let res = reqwest::get(url).await.unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let cache_hit_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "hit");
        assert!(headers.get("x-upstream-status").is_none());
        assert_eq!(res.text().await.unwrap(), "hello world");

        assert_eq!(cache_miss_epoch, cache_hit_epoch);

        sleep(Duration::from_millis(1100)).await; // ttl is 1

        // stale, but can be forced miss
        let res = reqwest::Client::new()
            .get(url)
            .header("x-force-miss", "1")
            .send()
            .await
            .unwrap();

        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(headers["x-upstream-status"], "200");
        let cache_miss_epoch2 = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert!(cache_miss_epoch != cache_miss_epoch2);
        assert_eq!(res.text().await.unwrap(), "hello world");
    }

    #[tokio::test]
    async fn test_force_fresh() {
        init();
        let url = "http://127.0.0.1:6148/unique/test_force_fresh/revalidate_now";

        let res = reqwest::get(url).await.unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let cache_miss_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(headers["x-upstream-status"], "200");
        assert_eq!(res.text().await.unwrap(), "hello world");

        let res = reqwest::get(url).await.unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let cache_hit_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "hit");
        assert!(headers.get("x-upstream-status").is_none());
        assert_eq!(res.text().await.unwrap(), "hello world");

        assert_eq!(cache_miss_epoch, cache_hit_epoch);

        sleep(Duration::from_millis(1100)).await; // ttl is 1

        // stale, but can be forced fresh
        let res = reqwest::Client::new()
            .get(url)
            .header("x-force-fresh", "1")
            .send()
            .await
            .unwrap();

        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");
        assert!(!headers.contains_key("x-upstream-status"));
        let cache_miss_epoch2 = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(cache_miss_epoch, cache_miss_epoch2);
        assert_eq!(res.text().await.unwrap(), "hello world");
    }

    #[tokio::test]
    async fn test_cache_downstream_revalidation_etag() {
        init();
        let url = "http://127.0.0.1:6148/unique/test_downstream_revalidation_etag/revalidate_now";
        let client = reqwest::Client::new();

        // MISS + 304
        let res = client
            .get(url)
            .header("If-None-Match", "\"abcd\", \"foobar\"") // "abcd" is the fixed etag of this
            // endpoint
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::NOT_MODIFIED);
        let headers = res.headers();
        let cache_miss_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), ""); // 304 no body

        // HIT + 304
        let res = client
            .get(url)
            .header("If-None-Match", "\"abcd\", \"foobar\"")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::NOT_MODIFIED);
        let headers = res.headers();
        let cache_hit_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), ""); // 304 no body

        assert_eq!(cache_miss_epoch, cache_hit_epoch);

        // HIT + 200 (condition passed)
        let res = client
            .get(url)
            .header("If-None-Match", "\"foobar\"")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let cache_hit_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), "hello world");

        assert_eq!(cache_miss_epoch, cache_hit_epoch);

        sleep(Duration::from_millis(1100)).await; // ttl is 1

        // revalidated + 304
        let res = client
            .get(url)
            .header("If-None-Match", "\"abcd\", \"foobar\"")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::NOT_MODIFIED);
        let headers = res.headers();
        let cache_expired_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "revalidated");
        assert_eq!(res.text().await.unwrap(), ""); // 304 no body

        // still the old object
        assert_eq!(cache_expired_epoch, cache_hit_epoch);
    }

    #[tokio::test]
    async fn test_cache_downstream_revalidation_last_modified() {
        init();
        let url = "http://127.0.0.1:6148/unique/test_downstream_revalidation_last_modified/revalidate_now";
        let client = reqwest::Client::new();

        // MISS + 304
        let res = client
            .get(url)
            .header("If-Modified-Since", "Tue, 03 May 2022 01:04:39 GMT") // fixed last-modified of
            // the endpoint
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::NOT_MODIFIED);
        let headers = res.headers();
        let cache_miss_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), ""); // 304 no body

        // HIT + 304
        let res = client
            .get(url)
            .header("If-Modified-Since", "Tue, 03 May 2022 01:11:39 GMT")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::NOT_MODIFIED);
        let headers = res.headers();
        let cache_hit_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), ""); // 304 no body

        assert_eq!(cache_miss_epoch, cache_hit_epoch);

        // HIT + 200 (condition passed)
        let res = client
            .get(url)
            .header("If-Modified-Since", "Tue, 03 May 2022 00:11:39 GMT")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let cache_hit_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), "hello world");

        assert_eq!(cache_miss_epoch, cache_hit_epoch);

        sleep(Duration::from_millis(1100)).await; // ttl is 1

        // revalidated + 304
        let res = client
            .get(url)
            .header("If-Modified-Since", "Tue, 03 May 2022 01:11:39 GMT")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::NOT_MODIFIED);
        let headers = res.headers();
        let cache_expired_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "revalidated");
        assert_eq!(res.text().await.unwrap(), ""); // 304 no body

        // still the old object
        assert_eq!(cache_expired_epoch, cache_hit_epoch);
    }

    #[tokio::test]
    async fn test_cache_downstream_head() {
        init();
        let url = "http://127.0.0.1:6148/unique/test_downstream_head/revalidate_now";
        let client = reqwest::Client::new();

        // MISS + HEAD
        let res = client.head(url).send().await.unwrap();

        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let cache_miss_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), ""); // HEAD no body

        // HIT + HEAD
        let res = client.head(url).send().await.unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let cache_hit_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), ""); // HEAD no body

        assert_eq!(cache_miss_epoch, cache_hit_epoch);

        sleep(Duration::from_millis(1100)).await; // ttl is 1

        // revalidated + HEAD
        let res = client.head(url).send().await.unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let cache_expired_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "revalidated");
        assert_eq!(res.text().await.unwrap(), ""); // HEAD no body

        // still the old object
        assert_eq!(cache_expired_epoch, cache_hit_epoch);
    }

    #[tokio::test]
    async fn test_purge_reject() {
        init();

        let res = reqwest::Client::builder()
            .build()
            .unwrap()
            .request(
                reqwest::Method::from_bytes(b"PURGE").unwrap(),
                "http://127.0.0.1:6148/",
            )
            .header("x-bypass-cache", "1") // not to cache this one
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::METHOD_NOT_ALLOWED);
        assert_eq!(res.text().await.unwrap(), "");
    }

    #[tokio::test]
    async fn test_cache_websocket_101() {
        // Test the unlikely scenario in which users may want to cache WS
        init();

        // First request - should be a miss
        let mut stream = TcpStream::connect("127.0.0.1:6148").await.unwrap();
        let req = concat!(
            "GET /unique/test_cache_websocket_101/upgrade HTTP/1.1\r\n",
            "Host: 127.0.0.1\r\n",
            "Upgrade: websocket\r\n",
            "Connection: Upgrade\r\n",
            "X-Cache-Websocket: 1\r\n",
            "\r\n"
        );
        stream.write_all(req.as_bytes()).await.unwrap();
        stream.flush().await.unwrap();

        let expected_payload = b"hello\n";
        let fut = read_response(&mut stream, expected_payload.len());
        let (resp_header, resp_body) = timeout(Duration::from_secs(5), fut).await.unwrap();

        assert_eq!(resp_header.status, 101);
        assert_eq!(resp_header.headers["Upgrade"], "websocket");
        assert_eq!(resp_header.headers["x-cache-status"], "miss");
        assert_eq!(resp_body, expected_payload);

        // Second request - should be a cache hit
        let mut stream = TcpStream::connect("127.0.0.1:6148").await.unwrap();
        let req = concat!(
            "GET /unique/test_cache_websocket_101/upgrade HTTP/1.1\r\n",
            "Host: 127.0.0.1\r\n",
            "Upgrade: websocket\r\n",
            "Connection: Upgrade\r\n",
            "X-Cache-Websocket: 1\r\n",
            "\r\n"
        );
        stream.write_all(req.as_bytes()).await.unwrap();
        stream.flush().await.unwrap();

        let fut = read_response(&mut stream, expected_payload.len());
        let (resp_header, resp_body) = timeout(Duration::from_secs(5), fut).await.unwrap();

        assert_eq!(resp_header.status, 101);
        assert_eq!(resp_header.headers["Upgrade"], "websocket");
        assert_eq!(resp_header.headers["x-cache-status"], "hit");
        assert_eq!(resp_body, expected_payload);
    }

    #[tokio::test]
    async fn test_1xx_caching() {
        // 1xx shouldn't interfere with HTTP caching

        // set up a one-off mock server
        // (warp / hyper don't have custom 1xx sending capabilities yet)
        async fn mock_1xx_server(port: u16, cc_header: &str) {
            use tokio::io::AsyncWriteExt;

            let listener = tokio::net::TcpListener::bind(format!("127.0.0.1:{}", port))
                .await
                .unwrap();
            if let Ok((mut stream, _addr)) = listener.accept().await {
                stream.write_all(b"HTTP/1.1 103 Early Hints\r\nLink: <https://foo.bar>; rel=preconnect\r\n\r\n").await.unwrap();
                // wait a bit so that the client can read
                sleep(Duration::from_millis(100)).await;
                stream.write_all(format!("HTTP/1.1 200 OK\r\nContent-Length: 5\r\nCache-Control: {}\r\n\r\nhello", cc_header).as_bytes()).await.unwrap();
                sleep(Duration::from_millis(100)).await;
            }
        }

        init();

        let url = "http://127.0.0.1:6148/unique/test_1xx_caching";

        tokio::spawn(async {
            mock_1xx_server(6151, "max-age=5").await;
        });
        // wait for server to start
        sleep(Duration::from_millis(100)).await;

        let client = reqwest::Client::new();
        let res = client
            .get(url)
            .header("x-port", "6151")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "hello");

        let res = client
            .get(url)
            .header("x-port", "6151")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), "hello");

        // 1xx shouldn't interfere with bypass
        let url = "http://127.0.0.1:6148/unique/test_1xx_bypass";

        tokio::spawn(async {
            mock_1xx_server(6152, "private, no-store").await;
        });
        // wait for server to start
        sleep(Duration::from_millis(100)).await;

        let res = client
            .get(url)
            .header("x-port", "6152")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "no-cache");
        assert_eq!(res.text().await.unwrap(), "hello");

        // restart the one-off server - still uncacheable
        sleep(Duration::from_millis(100)).await;
        tokio::spawn(async {
            mock_1xx_server(6152, "private, no-store").await;
        });
        // wait for server to start
        sleep(Duration::from_millis(100)).await;

        let res = client
            .get(url)
            .header("x-port", "6152")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "no-cache");
        assert_eq!(res.text().await.unwrap(), "hello");
    }

    #[tokio::test]
    async fn test_bypassed_became_cacheable() {
        init();

        let url = "http://127.0.0.1:6148/unique/test_bypassed/cache_control";

        let res = reqwest::Client::new()
            .get(url)
            .header("set-cache-control", "private, max-age=0")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let cc = headers.get("Cache-Control").unwrap();
        assert_eq!(cc, "private, max-age=0");
        assert_eq!(headers["x-cache-status"], "no-cache");
        assert_eq!(res.text().await.unwrap(), "hello world");

        // request should bypass cache, but became cacheable (cache fill)
        let res = reqwest::Client::new()
            .get(url)
            .header("set-cache-control", "public, max-age=10")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "hello world");

        // HIT
        let res = reqwest::Client::new()
            .get(url)
            .header("set-cache-control", "public, max-age=10")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), "hello world");
    }

    #[tokio::test]
    async fn test_bypassed_304() {
        init();

        let url = "http://127.0.0.1:6148/unique/test_bypassed_304/cache_control";

        let res = reqwest::Client::new()
            .get(url)
            .header("set-cache-control", "private, max-age=0")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let cc = headers.get("Cache-Control").unwrap();
        assert_eq!(cc, "private, max-age=0");
        assert_eq!(headers["x-cache-status"], "no-cache");
        assert_eq!(res.text().await.unwrap(), "hello world");

        // cacheable without private cache-control
        // note this will be a 304 and not a 200, we will cache on _next_ request
        let res = reqwest::Client::new()
            .get(url)
            .header("set-cache-control", "public, max-age=10")
            .header("set-revalidated", "1")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::NOT_MODIFIED);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "deferred");

        // should be cache fill
        let res = reqwest::Client::new()
            .get(url)
            .header("set-cache-control", "public, max-age=10")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "hello world");

        // HIT
        let res = reqwest::Client::new()
            .get(url)
            .header("set-cache-control", "public, max-age=10")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), "hello world");
    }

    #[tokio::test]
    async fn test_bypassed_uncacheable_304() {
        init();

        let url = "http://127.0.0.1:6148/unique/test_bypassed_private_304/cache_control";

        // cache fill
        let res = reqwest::Client::new()
            .get(url)
            .header("set-cache-control", "public, max-age=0")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let cc = headers.get("Cache-Control").unwrap();
        assert_eq!(cc, "public, max-age=0");
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "hello world");

        // cache stale
        // upstream returns 304, but response became uncacheable
        let res = reqwest::Client::new()
            .get(url)
            .header("set-cache-control", "private")
            .header("set-revalidated", "1")
            .send()
            .await
            .unwrap();
        // should see the response body because we didn't send conditional headers
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "revalidated");
        assert_eq!(res.text().await.unwrap(), "hello world");

        // we bypass cache for this next request
        let res = reqwest::Client::new()
            .get(url)
            .header("set-cache-control", "public, max-age=10")
            .header("set-revalidated", "1") // non-200 status to get bypass phase
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::NOT_MODIFIED);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "deferred");
    }

    #[tokio::test]
    async fn test_bypassed_head() {
        init();

        let url = "http://127.0.0.1:6148/unique/test_bypassed_head/cache_control";

        // uncacheable, should bypass
        let res = reqwest::Client::new()
            .get(url)
            .header("set-cache-control", "private, max-age=0")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "no-cache");
        assert_eq!(res.text().await.unwrap(), "hello world");

        // we bypass cache for this next request, becomes cacheable
        let res = reqwest::Client::new()
            .head(url)
            .header("set-cache-control", "public, max-age=10")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        // should not cache the response
        assert_eq!(headers["x-cache-status"], "deferred");

        // MISS
        let res = reqwest::Client::new()
            .get(url)
            .header("set-cache-control", "public, max-age=10")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "hello world");
    }

    #[tokio::test]
    async fn test_eviction() {
        init();
        let url = "http://127.0.0.1:6148/file_maker/test_eviction".to_owned();

        // admit asset 1
        let res = reqwest::Client::new()
            .get(url.clone() + "1")
            .header("x-set-size", "3000")
            .header("x-eviction", "1") // tell test proxy to use eviction manager
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap().len(), 3000);

        // admit asset 2
        let res = reqwest::Client::new()
            .get(url.clone() + "2")
            .header("x-set-size", "3000")
            .header("x-eviction", "1") // tell test proxy to use eviction manager
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap().len(), 3000);

        // touch asset 2
        let res = reqwest::Client::new()
            .get(url.clone() + "2")
            .header("x-set-size", "3000")
            .header("x-eviction", "1") // tell test proxy to use eviction manager
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap().len(), 3000);

        // touch asset 1
        let res = reqwest::Client::new()
            .get(url.clone() + "1")
            .header("x-set-size", "3000")
            .header("x-eviction", "1") // tell test proxy to use eviction manager
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap().len(), 3000);

        // admit asset 3
        let res = reqwest::Client::new()
            .get(url.clone() + "3")
            .header("x-set-size", "6000")
            .header("x-eviction", "1") // tell test proxy to use eviction manager
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap().len(), 6000);

        // check asset 2, it should be evicted already because admitting asset 3 made it full
        let res = reqwest::Client::new()
            .get(url.clone() + "2")
            .header("x-set-size", "3000")
            .header("x-eviction", "1") // tell test proxy to use eviction manager
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss"); // evicted
        assert_eq!(res.text().await.unwrap().len(), 3000);
    }

    #[tokio::test]
    async fn test_cache_lock_miss_hit() {
        init();
        let url = "http://127.0.0.1:6148/sleep/test_cache_lock_miss_hit.txt";

        // no lock, parallel fetches to a slow origin are all misses
        tokio::spawn(async move {
            let res = reqwest::Client::new().get(url).send().await.unwrap();
            assert_eq!(res.status(), StatusCode::OK);
            let headers = res.headers();
            assert_eq!(headers["x-cache-status"], "miss");
            assert_eq!(res.text().await.unwrap(), "hello world");
        });

        tokio::spawn(async move {
            let res = reqwest::Client::new().get(url).send().await.unwrap();
            assert_eq!(res.status(), StatusCode::OK);
            let headers = res.headers();
            assert_eq!(headers["x-cache-status"], "miss");
            assert_eq!(res.text().await.unwrap(), "hello world");
        });

        tokio::spawn(async move {
            let res = reqwest::Client::new().get(url).send().await.unwrap();
            assert_eq!(res.status(), StatusCode::OK);
            let headers = res.headers();
            assert_eq!(headers["x-cache-status"], "miss");
            assert_eq!(res.text().await.unwrap(), "hello world");
        })
        .await
        .unwrap(); // wait for at least one of them to finish

        let res = reqwest::Client::new().get(url).send().await.unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), "hello world");

        // try with lock
        let url = "http://127.0.0.1:6148/sleep/test_cache_lock_miss_hit2.txt";
        let task1 = tokio::spawn(async move {
            let res = reqwest::Client::new()
                .get(url)
                .header("x-lock", "true")
                .send()
                .await
                .unwrap();
            assert_eq!(res.status(), StatusCode::OK);
            let headers = res.headers();
            assert_eq!(headers["x-cache-status"], "miss");
            assert_eq!(res.text().await.unwrap(), "hello world");
        });
        // sleep just a little to make sure the req above gets the cache lock
        sleep(Duration::from_millis(50)).await;
        let task2 = tokio::spawn(async move {
            let res = reqwest::Client::new()
                .get(url)
                .header("x-lock", "true")
                .send()
                .await
                .unwrap();
            assert_eq!(res.status(), StatusCode::OK);
            let headers = res.headers();
            assert_eq!(headers["x-cache-status"], "hit");
            let lock_time_ms: u32 = headers["x-cache-lock-time-ms"]
                .to_str()
                .unwrap()
                .parse()
                .unwrap();
            assert!(lock_time_ms > 900 && lock_time_ms < 1000);
            assert_eq!(res.text().await.unwrap(), "hello world");
        });
        let task3 = tokio::spawn(async move {
            let res = reqwest::Client::new()
                .get(url)
                .header("x-lock", "true")
                .send()
                .await
                .unwrap();
            assert_eq!(res.status(), StatusCode::OK);
            let headers = res.headers();
            assert_eq!(headers["x-cache-status"], "hit");
            let lock_time_ms: u32 = headers["x-cache-lock-time-ms"]
                .to_str()
                .unwrap()
                .parse()
                .unwrap();
            assert!(lock_time_ms > 900 && lock_time_ms < 1000);
            assert_eq!(res.text().await.unwrap(), "hello world");
        });

        task1.await.unwrap();
        task2.await.unwrap();
        task3.await.unwrap();
    }

    #[tokio::test]
    async fn test_cache_lock_expired() {
        init();
        let url = "http://127.0.0.1:6148/sleep/test_cache_lock_expired.txt";

        // cache one
        let res = reqwest::Client::new()
            .get(url)
            .header("x-no-stale-revalidate", "true")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "hello world");
        // let it stale
        sleep(Duration::from_secs(1)).await;

        let task1 = tokio::spawn(async move {
            let res = reqwest::Client::new()
                .get(url)
                .header("x-lock", "true")
                .header("x-no-stale-revalidate", "true")
                .send()
                .await
                .unwrap();
            assert_eq!(res.status(), StatusCode::OK);
            let headers = res.headers();
            assert_eq!(headers["x-cache-status"], "expired");
            assert_eq!(res.text().await.unwrap(), "hello world");
        });
        // sleep just a little to make sure the req above gets the cache lock
        sleep(Duration::from_millis(50)).await;
        let task2 = tokio::spawn(async move {
            let res = reqwest::Client::new()
                .get(url)
                .header("x-lock", "true")
                .header("x-no-stale-revalidate", "true")
                .send()
                .await
                .unwrap();
            assert_eq!(res.status(), StatusCode::OK);
            let headers = res.headers();
            assert_eq!(headers["x-cache-status"], "hit");
            assert_eq!(res.text().await.unwrap(), "hello world");
        });
        let task3 = tokio::spawn(async move {
            let res = reqwest::Client::new()
                .get(url)
                .header("x-lock", "true")
                .header("x-no-stale-revalidate", "true")
                .send()
                .await
                .unwrap();
            assert_eq!(res.status(), StatusCode::OK);
            let headers = res.headers();
            assert_eq!(headers["x-cache-status"], "hit");
            assert_eq!(res.text().await.unwrap(), "hello world");
        });

        task1.await.unwrap();
        task2.await.unwrap();
        task3.await.unwrap();
    }

    #[tokio::test]
    async fn test_cache_lock_network_error() {
        init();
        let url = "http://127.0.0.1:6148/sleep/test_cache_lock_network_error.txt";

        let task1 = tokio::spawn(async move {
            let res = reqwest::Client::new()
                .get(url)
                .header("x-lock", "true")
                .header("x-set-sleep", "0.3") // sometimes we hit the retry logic which is x3 slow
                .header("x-abort", "true") // this will tell origin to kill the conn right away
                .send()
                .await
                .unwrap();
            assert_eq!(res.status(), 502); // error happened
        });
        // sleep just a little to make sure the req above gets the cache lock
        sleep(Duration::from_millis(50)).await;

        let task2 = tokio::spawn(async move {
            let res = reqwest::Client::new()
                .get(url)
                .header("x-lock", "true")
                .send()
                .await
                .unwrap();
            assert_eq!(res.status(), StatusCode::OK);
            let headers = res.headers();
            let status = headers["x-cache-status"].to_owned();
            assert_eq!(res.text().await.unwrap(), "hello world");
            status
        });
        let task3 = tokio::spawn(async move {
            let res = reqwest::Client::new()
                .get(url)
                .header("x-lock", "true")
                .send()
                .await
                .unwrap();
            assert_eq!(res.status(), StatusCode::OK);
            let headers = res.headers();
            let status = headers["x-cache-status"].to_owned();
            assert_eq!(res.text().await.unwrap(), "hello world");
            status
        });

        task1.await.unwrap();
        let status2 = task2.await.unwrap();
        let status3 = task3.await.unwrap();

        let mut count_miss = 0;
        if status2 == "miss" {
            count_miss += 1;
        }
        if status3 == "miss" {
            count_miss += 1;
        }
        assert_eq!(count_miss, 1);
    }

    #[tokio::test]
    async fn test_cache_lock_uncacheable() {
        init();
        let url = "http://127.0.0.1:6148/sleep/test_cache_lock_uncacheable.txt";

        let task1 = tokio::spawn(async move {
            let res = reqwest::Client::new()
                .get(url)
                .header("x-lock", "true")
                .header("x-no-store", "true") // tell origin to return CC: no-store
                .send()
                .await
                .unwrap();
            assert_eq!(res.status(), 200);
            let headers = res.headers();
            assert_eq!(headers["x-cache-status"], "no-cache");
            assert_eq!(res.text().await.unwrap(), "hello world");
        });
        // sleep just a little to make sure the req above gets the cache lock
        sleep(Duration::from_millis(50)).await;

        let task2 = tokio::spawn(async move {
            let res = reqwest::Client::new()
                .get(url)
                .header("x-lock", "true")
                .send()
                .await
                .unwrap();
            assert_eq!(res.status(), StatusCode::OK);
            let headers = res.headers();
            assert_eq!(headers["x-cache-status"], "no-cache");
            assert_eq!(res.text().await.unwrap(), "hello world");
        });
        let task3 = tokio::spawn(async move {
            let res = reqwest::Client::new()
                .get(url)
                .header("x-lock", "true")
                .send()
                .await
                .unwrap();
            assert_eq!(res.status(), StatusCode::OK);
            let headers = res.headers();
            assert_eq!(headers["x-cache-status"], "no-cache");
            assert_eq!(res.text().await.unwrap(), "hello world");
        });

        task1.await.unwrap();
        task2.await.unwrap();
        task3.await.unwrap();
    }

    #[tokio::test]
    async fn test_cache_lock_timeout() {
        init();
        let url = "http://127.0.0.1:6148/sleep/test_cache_lock_timeout.txt";

        let task1 = tokio::spawn(async move {
            let res = reqwest::Client::new()
                .get(url)
                .header("x-lock", "true")
                .header("x-set-sleep", "3") // we have a 2 second cache lock timeout
                .send()
                .await
                .unwrap();
            assert_eq!(res.status(), 200);
            let headers = res.headers();
            assert_eq!(headers["x-cache-status"], "miss");
            assert_eq!(res.text().await.unwrap(), "hello world");
        });
        // sleep just a little to make sure the req above gets the cache lock
        sleep(Duration::from_millis(50)).await;

        let task2 = tokio::spawn(async move {
            let res = reqwest::Client::new()
                .get(url)
                .header("x-lock", "true")
                .header("x-set-sleep", "0.1") // tell origin to return faster
                .send()
                .await
                .unwrap();
            assert_eq!(res.status(), StatusCode::OK);
            let headers = res.headers();
            // cache lock timeout, try to replace lock
            assert_eq!(headers["x-cache-status"], "miss");
            assert_eq!(res.text().await.unwrap(), "hello world");
        });

        // send the 3rd request after the 2 second cache lock timeout where the
        // first request still holds the lock (3s delay in origin)
        sleep(Duration::from_millis(2200)).await;
        let task3 = tokio::spawn(async move {
            let res = reqwest::Client::new()
                .get(url)
                .header("x-lock", "true")
                .header("x-set-sleep", "0.1")
                .send()
                .await
                .unwrap();
            assert_eq!(res.status(), StatusCode::OK);
            let headers = res.headers();
            // this is now a hit because the second task cached from origin
            // successfully
            // and will fetch from origin successfully
            assert_eq!(headers["x-cache-status"], "hit");
            assert_eq!(res.text().await.unwrap(), "hello world");
        });

        task1.await.unwrap();
        task2.await.unwrap();
        task3.await.unwrap();
    }

    #[tokio::test]
    async fn test_cache_lock_wait_timeout() {
        init();
        let url = "http://127.0.0.1:6148/sleep/test_cache_lock_wait_timeout.txt";

        let mut handles = vec![];
        const N_REQUESTS: u64 = 8;
        for _ in 0..N_REQUESTS {
            handles.push(tokio::spawn(async move {
                // Each task will attempt to wait for the origin's 1s sleep upon acquiring the
                // cache lock, before the origin disconnects.
                let res = reqwest::Client::new()
                    .get(url)
                    .header("x-lock", "true")
                    .header("x-set-sleep", "1") // we have a 2 second cache lock timeout
                    .header("x-abort", "1")
                    .send()
                    .await
                    .unwrap();
                assert_eq!(res.status(), 502);
                let headers = res.headers();
                headers
                    .get("x-cache-lock-time-ms")
                    .and_then(|ms| ms.to_str().ok().and_then(|s| s.parse::<u64>().ok()))
            }));
        }
        let mut waited_count = 0;
        for handle in handles {
            let lock_time_ms = handle.await.unwrap();
            if let Some(lock_time_ms) = lock_time_ms {
                // should not have waited more than 2s for each response
                waited_count += 1;
                assert!(lock_time_ms <= 2200);
            }
        }
        assert!(waited_count > 0, "at least one reader waited");
        // This whole process /should/ have taken no longer than 4s, as each reader has an
        // independently enforced 2s timeout
    }

    #[tokio::test]
    async fn test_cache_serve_stale_network_error() {
        init();
        let url = "http://127.0.0.1:6148/sleep/test_cache_serve_stale_network_error.txt";

        let res = reqwest::Client::new()
            .get(url)
            .header("x-set-sleep", "0") // no need to sleep we just reuse this endpoint
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), 200);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "hello world");

        sleep(Duration::from_millis(1100)).await; // ttl is 1

        let res = reqwest::Client::new()
            .get(url)
            .header("x-set-sleep", "0") // no need to sleep we just reuse this endpoint
            .header("x-abort", "true") // this will tell origin to kill the conn right away
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), 200);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "stale");
        assert_eq!(res.text().await.unwrap(), "hello world");
    }

    #[tokio::test]
    async fn test_cache_serve_stale_network_error_mid_response() {
        init();
        let url =
            "http://127.0.0.1:6148/sleep/test_cache_serve_stale_network_error_mid_response.txt";

        let res = reqwest::Client::new()
            .get(url)
            .header("x-set-sleep", "0") // no need to sleep we just reuse this endpoint
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), 200);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "hello world");

        sleep(Duration::from_millis(1100)).await; // ttl is 1

        let res = reqwest::Client::new()
            .get(url)
            .header("x-set-sleep", "0") // no need to sleep we just reuse this endpoint
            .header("x-set-body-sleep", "0.1") // pause the body a bit before abort
            .header("x-abort-body", "true") // this will tell origin to kill the conn right away
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), 200);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "expired");
        // the connection dies
        assert!(res.text().await.is_err());
    }

    #[tokio::test]
    async fn test_cache_serve_stale_on_500() {
        init();
        let url = "http://127.0.0.1:6148/sleep/test_cache_serve_stale_on_500.txt";

        let res = reqwest::Client::new()
            .get(url)
            .header("x-set-sleep", "0") // no need to sleep we just reuse this endpoint
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), 200);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "hello world");

        sleep(Duration::from_millis(1100)).await; // ttl is 1

        let res = reqwest::Client::new()
            .get(url)
            .header("x-set-sleep", "0") // no need to sleep we just reuse this endpoint
            .header("x-error-header", "true") // this will tell origin to return 500
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), 200);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "stale");
        assert_eq!(res.text().await.unwrap(), "hello world");
    }

    #[tokio::test]
    async fn test_stale_while_revalidate_many_readers() {
        init();
        let url = "http://127.0.0.1:6148/sleep/test_stale_while_revalidate_many_readers.txt";

        // cache one
        let res = reqwest::Client::new().get(url).send().await.unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "hello world");
        // let it stale
        sleep(Duration::from_secs(1)).await;

        let task1 = tokio::spawn(async move {
            let res = reqwest::Client::new()
                .get(url)
                .header("x-lock", "true")
                .send()
                .await
                .unwrap();
            assert_eq!(res.status(), StatusCode::OK);
            let headers = res.headers();
            assert_eq!(headers["x-cache-status"], "stale-updating");
            assert_eq!(res.text().await.unwrap(), "hello world");
        });
        // sleep just a little to make sure the req above gets the cache lock
        sleep(Duration::from_millis(50)).await;
        let task2 = tokio::spawn(async move {
            let res = reqwest::Client::new()
                .get(url)
                .header("x-lock", "true")
                .send()
                .await
                .unwrap();
            assert_eq!(res.status(), StatusCode::OK);
            let headers = res.headers();
            assert_eq!(headers["x-cache-status"], "stale-updating");
            assert_eq!(res.text().await.unwrap(), "hello world");
        });
        let task3 = tokio::spawn(async move {
            let res = reqwest::Client::new()
                .get(url)
                .header("x-lock", "true")
                .send()
                .await
                .unwrap();
            assert_eq!(res.status(), StatusCode::OK);
            let headers = res.headers();
            assert_eq!(headers["x-cache-status"], "stale-updating");
            assert_eq!(res.text().await.unwrap(), "hello world");
        });

        task1.await.unwrap();
        task2.await.unwrap();
        task3.await.unwrap();
    }

    #[tokio::test]
    async fn test_stale_while_revalidate_single_request() {
        init();
        let url = "http://127.0.0.1:6148/sleep/test_stale_while_revalidate_single_request.txt";

        // cache one
        let res = reqwest::Client::new()
            .get(url)
            .header("x-set-sleep", "0")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "hello world");
        // let it stale
        sleep(Duration::from_secs(1)).await;

        let res = reqwest::Client::new()
            .get(url)
            .header("x-lock", "true")
            .header("x-set-sleep", "0") // by default /sleep endpoint will sleep 1s
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "stale-updating");
        assert_eq!(res.text().await.unwrap(), "hello world");

        // wait for the background request to finish
        sleep(Duration::from_millis(100)).await;

        let res = reqwest::Client::new()
            .get(url)
            .header("x-lock", "true")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit"); // fresh
        assert_eq!(res.text().await.unwrap(), "hello world");
    }

    #[tokio::test]
    async fn test_cache_streaming_partial_body() {
        init();
        let url = "http://127.0.0.1:6148/slow_body/test_cache_streaming_partial_body.txt";
        let task1 = tokio::spawn(async move {
            let res = reqwest::Client::new()
                .get(url)
                .header("x-lock", "true")
                .send()
                .await
                .unwrap();
            assert_eq!(res.status(), StatusCode::OK);
            let headers = res.headers();
            assert_eq!(headers["x-cache-status"], "miss");
            assert_eq!(res.text().await.unwrap(), "hello world!");
        });
        // sleep just a little to make sure the req above gets the cache lock
        sleep(Duration::from_millis(50)).await;

        let task2 = tokio::spawn(async move {
            let start = Instant::now();
            let res = reqwest::Client::new()
                .get(url)
                .header("x-lock", "true")
                .send()
                .await
                .unwrap();
            assert_eq!(res.status(), StatusCode::OK);
            let headers = res.headers();
            assert_eq!(headers["x-cache-status"], "hit");
            let lock_time_ms: u32 = headers["x-cache-lock-time-ms"]
                .to_str()
                .unwrap()
                .parse()
                .unwrap();
            let body = res.text().await.unwrap();
            let total_ms = start.elapsed().as_millis() as u32;
            // lock should cover only the header, not the full body streaming.
            // if the body were also under lock, lock_time would approach total_ms.
            assert!(
                lock_time_ms < total_ms / 2,
                "lock time {lock_time_ms}ms should be well under total request time {total_ms}ms"
            );
            assert_eq!(body, "hello world!");
        });
        let task3 = tokio::spawn(async move {
            let start = Instant::now();
            let res = reqwest::Client::new()
                .get(url)
                .header("x-lock", "true")
                .send()
                .await
                .unwrap();
            assert_eq!(res.status(), StatusCode::OK);
            let headers = res.headers();
            assert_eq!(headers["x-cache-status"], "hit");
            let lock_time_ms: u32 = headers["x-cache-lock-time-ms"]
                .to_str()
                .unwrap()
                .parse()
                .unwrap();
            let body = res.text().await.unwrap();
            let total_ms = start.elapsed().as_millis() as u32;
            // lock should cover only the header, not the full body streaming.
            // if the body were also under lock, lock_time would approach total_ms.
            assert!(
                lock_time_ms < total_ms / 2,
                "lock time {lock_time_ms}ms should be well under total request time {total_ms}ms"
            );
            assert_eq!(body, "hello world!");
        });

        task1.await.unwrap();
        task2.await.unwrap();
        task3.await.unwrap();
    }

    #[tokio::test]
    async fn test_cache_streaming_multiple_writers() {
        // multiple streaming writers don't conflict
        init();
        let url = "http://127.0.0.1:6148/slow_body/test_cache_streaming_multiple_writers.txt";
        let task1 = tokio::spawn(async move {
            let res = reqwest::Client::new()
                .get(url)
                .header("x-set-hello", "everyone")
                .send()
                .await
                .unwrap();
            assert_eq!(res.status(), StatusCode::OK);
            let headers = res.headers();
            assert_eq!(headers["x-cache-status"], "miss");
            assert_eq!(res.text().await.unwrap(), "hello everyone!");
        });

        let task2 = tokio::spawn(async move {
            let res = reqwest::Client::new()
                .get(url)
                // don't allow using the other streaming write's result
                .header("x-force-expire", "1")
                .header("x-set-hello", "todo el mundo")
                .send()
                .await
                .unwrap();
            assert_eq!(res.status(), StatusCode::OK);
            let headers = res.headers();
            assert_eq!(headers["x-cache-status"], "miss");
            assert_eq!(res.text().await.unwrap(), "hello todo el mundo!");
        });

        task1.await.unwrap();
        task2.await.unwrap();
    }

    #[tokio::test]
    async fn test_range_request() {
        init();
        let url = "http://127.0.0.1:6148/unique/test_range_request/now";

        let res = reqwest::Client::new()
            .get(url)
            .header("Range", "bytes=0-1")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::PARTIAL_CONTENT);
        let headers = res.headers();
        let cache_miss_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "he");

        // full body is cached
        let res = reqwest::get(url).await.unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let cache_hit_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), "hello world");

        assert_eq!(cache_miss_epoch, cache_hit_epoch);

        let res = reqwest::Client::new()
            .get(url)
            .header("Range", "bytes=0-1")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::PARTIAL_CONTENT);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), "he");

        let res = reqwest::Client::new()
            .get(url)
            .header("Range", "bytes=1-0")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::RANGE_NOT_SATISFIABLE);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), "");

        let res = reqwest::Client::new()
            .head(url)
            .header("Range", "bytes=0-1")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::PARTIAL_CONTENT);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), "");

        sleep(Duration::from_millis(1100)).await; // ttl is 1

        let res = reqwest::Client::new()
            .get(url)
            .header("Range", "bytes=0-1")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::PARTIAL_CONTENT);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "expired");
        assert_eq!(res.text().await.unwrap(), "he");
    }

    #[tokio::test]
    async fn test_multipart_range_request() {
        init();
        let url = "http://127.0.0.1:6148/unique/test_multipart_range_request/now";

        // 1st multipart range request - uncached

        let res = reqwest::Client::new()
            .get(url)
            .header("Range", "bytes=0-1, 6-8") // he wor
            .send()
            .await
            .unwrap();

        assert_eq!(res.status(), StatusCode::PARTIAL_CONTENT);
        let headers = res.headers().clone();
        let content_type = headers.get("content-type").unwrap().to_str().unwrap();
        // Grab boundary to verify full response
        let boundary = content_type
            .split("boundary=")
            .nth(1)
            .expect("Expected to have a boundary");
        assert_eq!(
            content_type,
            format!("multipart/byteranges; boundary={boundary}")
        );
        assert_eq!(headers["x-cache-status"], "miss");

        let body = res.text().await.unwrap();

        let expected_body = format!(
            "\r\n--{boundary}\r\n\
            Content-Type: text/plain\r\n\
            Content-Range: bytes 0-1/11\r\n\
            \r\n\
            he\
            \r\n--{boundary}\r\n\
            Content-Type: text/plain\r\n\
            Content-Range: bytes 6-8/11\r\n\
            \r\n\
            wor\
            \r\n--{boundary}--\r\n\
        "
        );
        assert_eq!(body, expected_body);

        // 2nd request, cached

        let res = reqwest::Client::new()
            .get(url)
            .header("Range", "bytes=2-3, 8-10") // ll rld
            .send()
            .await
            .unwrap();

        assert_eq!(res.status(), StatusCode::PARTIAL_CONTENT);
        let headers = res.headers().clone();
        let content_type = headers.get("content-type").unwrap().to_str().unwrap();
        let boundary = content_type
            .split("boundary=")
            .nth(1)
            .expect("Expected to have a boundary");
        assert_eq!(
            content_type,
            format!("multipart/byteranges; boundary={boundary}")
        );
        assert_eq!(headers["x-cache-status"], "hit");

        let body = res.text().await.unwrap();

        let expected_body = format!(
            "\r\n--{boundary}\r\n\
            Content-Type: text/plain\r\n\
            Content-Range: bytes 2-3/11\r\n\
            \r\n\
            ll\
            \r\n--{boundary}\r\n\
            Content-Type: text/plain\r\n\
            Content-Range: bytes 8-10/11\r\n\
            \r\n\
            rld\
            \r\n--{boundary}--\r\n\
        "
        );
        assert_eq!(body, expected_body);

        // 3rd request, cached

        let res = reqwest::Client::new()
            .get(url)
            .header("Range", "bytes=1-2, 3-4, 8-10") // el lo rld
            .send()
            .await
            .unwrap();

        assert_eq!(res.status(), StatusCode::PARTIAL_CONTENT);
        let headers = res.headers().clone();
        let content_type = headers.get("content-type").unwrap().to_str().unwrap();
        let boundary = content_type
            .split("boundary=")
            .nth(1)
            .expect("Expected to have a boundary");
        assert_eq!(
            content_type,
            format!("multipart/byteranges; boundary={boundary}")
        );
        assert_eq!(headers["x-cache-status"], "hit");

        let body = res.text().await.unwrap();

        let expected_body = format!(
            "\r\n--{boundary}\r\n\
            Content-Type: text/plain\r\n\
            Content-Range: bytes 1-2/11\r\n\
            \r\n\
            el\
            \r\n--{boundary}\r\n\
            Content-Type: text/plain\r\n\
            Content-Range: bytes 3-4/11\r\n\
            \r\n\
            lo\
            \r\n--{boundary}\r\n\
            Content-Type: text/plain\r\n\
            Content-Range: bytes 8-10/11\r\n\
            \r\n\
            rld\
            \r\n--{boundary}--\r\n\
        "
        );
        assert_eq!(body, expected_body);

        // 4th request - cached and poorly formed request header

        let res = reqwest::Client::new()
            .get(url)
            .header("Range", "bytes=2-3, 8-10, 3-5")
            .send()
            .await
            .unwrap();

        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers().clone();
        assert_eq!(headers["content-type"], "text/plain");
        assert_eq!(headers["x-cache-status"], "hit");

        let body = res.text().await.unwrap();
        assert_eq!(body, "hello world");

        // 5th request: Single range GET

        let res = reqwest::Client::new()
            .get(url)
            .header("Range", "bytes=0-2")
            .send()
            .await
            .unwrap();

        assert_eq!(res.status(), StatusCode::PARTIAL_CONTENT);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");

        let body = res.text().await.unwrap();
        assert_eq!(body, "hel");

        // 6th request invalid range

        let res = reqwest::Client::new()
            .get(url)
            .header("Range", "bytes=20-22, 30-40")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::RANGE_NOT_SATISFIABLE);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), "");

        // 7th request: Single range HEAD

        let res = reqwest::Client::new()
            .head(url)
            .header("Range", "bytes=3-7")
            .send()
            .await
            .unwrap();

        assert_eq!(res.status(), StatusCode::PARTIAL_CONTENT);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");

        let body = res.text().await.unwrap();
        assert_eq!(body, "");
    }

    #[tokio::test]
    async fn test_single_then_mutltipart_range_request() {
        init();
        let url = "http://127.0.0.1:6148/unique/test_single_then_multipart_range_request/now";

        // 1st request - single range request

        let res = reqwest::Client::new()
            .get(url)
            .header("Range", "bytes=0-4")
            .send()
            .await
            .unwrap();

        assert_eq!(res.status(), StatusCode::PARTIAL_CONTENT);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");

        let body = res.text().await.unwrap();
        assert_eq!(body, "hello");

        // 2nd request - multipart range request

        let res = reqwest::Client::new()
            .get(url)
            .header("Range", "bytes=0-3, 6-7") // hell wo
            .send()
            .await
            .unwrap();

        assert_eq!(res.status(), StatusCode::PARTIAL_CONTENT);
        let headers = res.headers().clone();
        let content_type = headers.get("content-type").unwrap().to_str().unwrap();
        let boundary = content_type
            .split("boundary=")
            .nth(1)
            .expect("Expected to have a boundary");
        assert_eq!(
            content_type,
            format!("multipart/byteranges; boundary={boundary}")
        );
        assert_eq!(headers["x-cache-status"], "hit");

        let body = res.text().await.unwrap();

        let expected_body = format!(
            "\r\n--{boundary}\r\n\
            Content-Type: text/plain\r\n\
            Content-Range: bytes 0-3/11\r\n\
            \r\n\
            hell\
            \r\n--{boundary}\r\n\
            Content-Type: text/plain\r\n\
            Content-Range: bytes 6-7/11\r\n\
            \r\n\
            wo\
            \r\n--{boundary}--\r\n\
        "
        );
        assert_eq!(body, expected_body);

        // 3rd request - Multipart request with one valid range
        let res = reqwest::Client::new()
            .get(url)
            .header("Range", "bytes=0-4, 12-14, 16-18") // hello
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::PARTIAL_CONTENT);
        let headers = res.headers();
        let content_type = headers.get("content-type").unwrap().to_str().unwrap();
        assert!(!content_type.contains("multipart/byteranges; boundary="));
        assert_eq!(headers["x-cache-status"], "hit");

        assert_eq!(res.text().await.unwrap(), "hello");
    }

    #[tokio::test]
    async fn test_caching_when_downstream_bails() {
        init();
        let url = "http://127.0.0.1:6148/slow_body/test_caching_when_downstream_bails/";

        tokio::spawn(async move {
            let res = reqwest::Client::new()
                .get(url)
                .header("x-lock", "true")
                .send()
                .await
                .unwrap();
            assert_eq!(res.status(), StatusCode::OK);
            let headers = res.headers();
            assert_eq!(headers["x-cache-status"], "miss");
            // exit without res.text().await so that we bail early
        });
        // sleep just a little to make sure the req above gets the cache lock
        sleep(Duration::from_millis(50)).await;

        let res = reqwest::Client::new()
            .get(url)
            .header("x-lock", "true")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");
        let lock_time_ms: u32 = headers["x-cache-lock-time-ms"]
            .to_str()
            .unwrap()
            .parse()
            .unwrap();
        // the entire body should need 2 extra seconds, here the test shows that
        // only the header is under cache lock and the body should be streamed
        assert!(lock_time_ms > 900 && lock_time_ms < 1000);
        assert_eq!(res.text().await.unwrap(), "hello world!");
    }

    #[tokio::test]
    async fn test_caching_when_downstream_bails_uncacheable() {
        init();
        let url = "http://127.0.0.1:6148/slow_body/test_caching_when_downstream_bails_uncacheable/";

        tokio::spawn(async move {
            let res = reqwest::Client::new()
                .get(url)
                .header("x-lock", "true")
                .header("x-no-store", "1")
                .send()
                .await
                .unwrap();
            assert_eq!(res.status(), StatusCode::OK);
            let headers = res.headers();
            assert_eq!(headers["x-cache-status"], "no-cache");
            // exit without res.text().await so that we bail early
        });
        // sleep just a little to make sure the req above gets the cache lock
        sleep(Duration::from_millis(50)).await;

        let res = reqwest::Client::new()
            .get(url)
            .header("x-lock", "true")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        // entirely new request made to upstream, since the response was uncacheable
        assert_eq!(headers["x-cache-status"], "no-cache"); // due to cache lock give up
        assert_eq!(res.text().await.unwrap(), "hello world!");
    }

    #[tokio::test]
    async fn test_caching_when_downstream_bails_header() {
        init();
        let url = "http://127.0.0.1:6148/unique/test_caching_when_downstream_bails_header/sleep";

        tokio::spawn(async move {
            // this should always time out
            reqwest::Client::new()
                .get(url)
                .header("x-lock", "true")
                .header("x-set-sleep", "2")
                .timeout(Duration::from_secs(1))
                .send()
                .await
                .unwrap_err()
        });
        // sleep after cache fill
        sleep(Duration::from_millis(2500)).await;

        // next request should be a cache hit
        let res = reqwest::Client::new()
            .get(url)
            .header("x-lock", "true")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), "hello world");
    }

    #[tokio::test]
    async fn test_caching_when_downstream_bails_header_uncacheable() {
        init();
        let url = "http://127.0.0.1:6148/unique/test_caching_when_downstream_bails_header_uncacheable/sleep";

        tokio::spawn(async move {
            // this should always time out
            reqwest::Client::new()
                .get(url)
                .header("x-lock", "true")
                .header("x-set-sleep", "2")
                .header("x-no-store", "1")
                .timeout(Duration::from_secs(1))
                .send()
                .await
                .unwrap_err()
            // note that while the downstream error is ignored,
            // once the response is uncacheable we will still attempt to write
            // downstream and find a broken connection that terminates the request
        });
        // sleep after cache fill
        sleep(Duration::from_millis(2500)).await;

        // next request should be a cache miss, as the previous fill was uncacheable
        let res = reqwest::Client::new()
            .get(url)
            .header("x-lock", "true")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "hello world");
    }

    async fn send_vary_req_with_headers_with_dups(
        url: &str,
        vary_field: &str,
        headers: Vec<(&str, &str)>,
        dup_headers: Vec<(&str, &str)>,
    ) -> reqwest::Response {
        let req_headers = headers
            .iter()
            .map(|(name, value)| {
                (
                    HeaderName::from_str(name).unwrap(),
                    HeaderValue::from_str(value).unwrap(),
                )
            })
            .collect();
        let mut req_builder = reqwest::Client::new()
            .get(url)
            .headers(req_headers)
            .header("set-vary", vary_field);

        // Apply any duplicate headers
        for (key, value) in dup_headers {
            req_builder = req_builder.header(key, value);
        }

        req_builder.send().await.unwrap()
    }

    async fn send_vary_req_with_headers(
        url: &str,
        vary_field: &str,
        headers: Vec<(&str, &str)>,
    ) -> reqwest::Response {
        send_vary_req_with_headers_with_dups(url, vary_field, headers, vec![]).await
    }

    async fn send_vary_req(url: &str, vary_field: &str, value: &str) -> reqwest::Response {
        send_vary_req_with_headers(url, vary_field, vec![(vary_field, value)]).await
    }

    #[tokio::test]
    async fn test_vary_caching() {
        init();
        let url = "http://127.0.0.1:6148/unique/test_vary_caching/vary";
        let vary_field = "Accept";

        let res = send_vary_req(url, vary_field, "image/png").await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let cache_a_miss_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "hello world");

        let res = send_vary_req(url, vary_field, "image/png").await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let cache_hit_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), "hello world");

        assert_eq!(cache_a_miss_epoch, cache_hit_epoch);

        let res = send_vary_req(url, vary_field, "image/jpeg").await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let cache_b_miss_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "hello world");

        let res = send_vary_req(url, vary_field, "image/jpeg").await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let cache_hit_epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), "hello world");

        assert_eq!(cache_b_miss_epoch, cache_hit_epoch);
        assert!(cache_a_miss_epoch != cache_b_miss_epoch);
    }

    #[tokio::test]
    async fn test_vary_caching_ignored_vary_header() {
        init();
        let url = "http://127.0.0.1:6148/unique/test_vary_caching_ignored_vary_header/vary";
        let vary_field = "Some-Ignored-Vary-Header";

        // Asset into cache (png)
        let res = send_vary_req(url, vary_field, "image/png").await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let epoch = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "hello world");

        // HIT on png
        let res = send_vary_req(url, vary_field, "image/png").await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(
            epoch,
            headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap()
        );
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), "hello world");

        // Vary header ignored -> get png, not jpeg
        let res = send_vary_req(url, vary_field, "image/jpeg").await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(
            epoch,
            headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap()
        );
        assert_eq!(headers["x-cache-status"], "hit");
    }

    #[tokio::test]
    async fn test_vary_some_ignored() {
        init();
        let url = "http://127.0.0.1:6148/unique/test_vary_some_ignored/vary";
        let vary_header = "Accept, SomeIgnoredVaryHeader";

        // Make a request where we vary on some headers, and provide values for those.
        let res = send_vary_req_with_headers(
            url,
            vary_header,
            vec![
                ("SomeIgnoredVaryHeader", "image/webp"),
                ("Accept", "image/webp"),
            ],
        )
        .await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        let epoch1 = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(res.text().await.unwrap(), "hello world");

        // Identical request should yield a HIT.
        let res = send_vary_req_with_headers(
            url,
            vary_header,
            vec![
                ("SomeIgnoredVaryHeader", "image/webp"),
                ("Accept", "image/webp"),
            ],
        )
        .await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(
            epoch1,
            headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap()
        );
        assert_eq!(res.text().await.unwrap(), "hello world");

        // Hit when changing a header we don't vary on.
        let res = send_vary_req_with_headers(
            url,
            vary_header,
            vec![
                ("SomeIgnoredVaryHeader", "definitely-not-webp"),
                ("Accept", "image/webp"),
            ],
        )
        .await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(
            epoch1,
            headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap()
        );
        assert_eq!(res.text().await.unwrap(), "hello world");

        // Get a secondary variant by changing Accept.
        let res = send_vary_req_with_headers(
            url,
            vary_header,
            vec![
                ("SomeIgnoredVaryHeader", "definitely-not-webp"),
                ("Accept", "image/jpeg"),
            ],
        )
        .await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        let epoch2 = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_ne!(epoch1, epoch2);
        assert_eq!(res.text().await.unwrap(), "hello world");

        // Cache hit on secondary variant.
        let res = send_vary_req_with_headers(
            url,
            vary_header,
            vec![
                ("SomeIgnoredVaryHeader", "definitely-not-webp"),
                ("Accept", "image/jpeg"),
            ],
        )
        .await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(
            epoch2,
            headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap()
        );

        // Cache hit on primary variant.
        let res = send_vary_req_with_headers(
            url,
            vary_header,
            vec![
                ("SomeIgnoredVaryHeader", "definitely-not-webp"),
                ("Accept", "image/webp"),
            ],
        )
        .await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(
            epoch1,
            headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap()
        );
    }

    #[tokio::test]
    async fn test_vary_dup_header_some_ignored() {
        init();
        let url = "http://127.0.0.1:6148/unique/test_vary_dup_header_some_ignored/vary";
        let first_vary_header = "SomeIgnoredVaryHeader";
        let dup_vary_header = "Accept";

        // Make a request where we vary on some headers, and provide values for those.
        let res = send_vary_req_with_headers_with_dups(
            url,
            first_vary_header,
            vec![
                ("SomeIgnoredVaryHeader", "image/webp"),
                ("Accept", "image/webp"),
            ],
            vec![("set-vary", dup_vary_header)],
        )
        .await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        let epoch1 = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_eq!(res.text().await.unwrap(), "hello world");

        // Identical request should yield a HIT.
        let res = send_vary_req_with_headers_with_dups(
            url,
            first_vary_header,
            vec![
                ("SomeIgnoredVaryHeader", "image/webp"),
                ("Accept", "image/webp"),
            ],
            vec![("set-vary", dup_vary_header)],
        )
        .await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(
            epoch1,
            headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap()
        );
        assert_eq!(res.text().await.unwrap(), "hello world");

        // Get a secondary variant by changing Accept.
        let res = send_vary_req_with_headers_with_dups(
            url,
            first_vary_header,
            vec![
                ("SomeIgnoredVaryHeader", "image/webp"),
                ("Accept", "image/jpeg"),
            ],
            vec![("set-vary", dup_vary_header)],
        )
        .await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        let epoch2 = headers["x-epoch"].to_str().unwrap().parse::<f64>().unwrap();
        assert_ne!(epoch1, epoch2);
        assert_eq!(res.text().await.unwrap(), "hello world");
    }

    #[tokio::test]
    async fn test_vary_purge() {
        init();
        let url = "http://127.0.0.1:6148/unique/test_vary_purge/vary";
        let vary_field = "Accept";
        let opt1 = "image/png";
        let opt2 = "image/jpeg";

        send_vary_req(url, vary_field, opt1).await;
        let res = send_vary_req(url, vary_field, opt1).await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");

        send_vary_req(url, vary_field, opt2).await;
        let res = send_vary_req(url, vary_field, opt2).await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");

        //both variances are cached

        let res = reqwest::Client::builder()
            .build()
            .unwrap()
            .request(reqwest::Method::from_bytes(b"PURGE").unwrap(), url)
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        assert_eq!(res.text().await.unwrap(), "");

        //both should be miss

        let res = send_vary_req(url, vary_field, opt1).await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");

        let res = send_vary_req(url, vary_field, opt2).await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
    }

    async fn send_max_file_size_req(
        url: &str,
        max_file_size_bytes: usize,
        range: Option<(usize, usize)>,
    ) -> reqwest::Response {
        let mut req = reqwest::Client::new().get(url).header(
            "x-cache-max-file-size-bytes",
            max_file_size_bytes.to_string(),
        );
        if let Some(r) = range {
            req = req.header("Range", format!("bytes={}-{}", r.0, r.1));
        }
        req.send().await.unwrap()
    }

    #[tokio::test]
    async fn test_cache_max_file_size() {
        init();
        let url = "http://127.0.0.1:6148/unique/test_cache_max_file_size_100/now";

        let res = send_max_file_size_req(url, 100, None).await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(headers["content-length"], "11");
        assert_eq!(res.text().await.unwrap(), "hello world");

        let res = send_max_file_size_req(url, 100, None).await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), "hello world");

        let url = "http://127.0.0.1:6148/unique/test_cache_max_file_size_1/now";
        let res = send_max_file_size_req(url, 1, None).await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "no-cache");
        assert_eq!(res.text().await.unwrap(), "hello world");

        let res = send_max_file_size_req(url, 1, None).await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "no-cache");
        assert_eq!(res.text().await.unwrap(), "hello world");

        // became cacheable
        let res = send_max_file_size_req(url, 100, None).await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "hello world");

        let res = send_max_file_size_req(url, 100, None).await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), "hello world");
    }

    #[tokio::test]
    async fn test_cache_max_file_size_chunked() {
        init();
        let url = "http://127.0.0.1:6148/unique/test_cache_max_file_size_chunked_100/test3";

        let res = send_max_file_size_req(url, 100, None).await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(headers["transfer-encoding"], "chunked");
        assert_eq!(res.text().await.unwrap(), "hello world");

        let res = send_max_file_size_req(url, 100, None).await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), "hello world");

        let url = "http://127.0.0.1:6148/unique/test_cache_max_file_size_chunked_1/test3";
        let res = send_max_file_size_req(url, 1, None).await;
        // TODO: this can currently break with 500 when body arrives alongside header
        assert!(matches!(
            res.status(),
            StatusCode::INTERNAL_SERVER_ERROR | StatusCode::OK
        ));
        let headers = res.headers();
        assert!(headers
            .get("x-cache-status")
            .is_none_or(|s| s == "no-cache"));

        let res = send_max_file_size_req(url, 1, None).await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "no-cache");
        assert_eq!(res.text().await.unwrap(), "hello world");

        // became cacheable
        let res = send_max_file_size_req(url, 100, None).await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        // will get marked on the next request
        assert_eq!(headers["x-cache-status"], "no-cache");
        assert_eq!(res.text().await.unwrap(), "hello world");

        let res = send_max_file_size_req(url, 100, None).await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "hello world");

        let res = send_max_file_size_req(url, 100, None).await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), "hello world");
    }

    #[tokio::test]
    async fn test_cache_max_file_size_range() {
        init();
        let url = "http://127.0.0.1:6148/unique/test_cache_max_file_size_range_100/now";

        let res = send_max_file_size_req(url, 100, Some((1, 4))).await;
        assert_eq!(res.status(), StatusCode::PARTIAL_CONTENT);
        let headers = res.headers();
        let epoch1 = headers["x-epoch"].clone();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "ello");

        let res = send_max_file_size_req(url, 100, Some((1, 4))).await;
        assert_eq!(res.status(), StatusCode::PARTIAL_CONTENT);
        let headers = res.headers();
        let epoch2 = headers["x-epoch"].clone();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), "ello");
        assert_eq!(epoch1, epoch2);

        // disable downstream ranging on max file size exceeded
        let url = "http://127.0.0.1:6148/unique/test_cache_max_file_size_range_1/now";
        let res = send_max_file_size_req(url, 1, Some((1, 4))).await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let epoch1 = headers["x-epoch"].clone();
        assert_eq!(headers["x-cache-status"], "no-cache");
        assert_eq!(res.text().await.unwrap(), "hello world");

        // predicted uncacheable (note upstream endpoint doesn't support range)
        let res = send_max_file_size_req(url, 1, Some((1, 4))).await;
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        let epoch2 = headers["x-epoch"].clone();
        assert_eq!(headers["x-cache-status"], "no-cache");
        assert_eq!(res.text().await.unwrap(), "hello world");
        assert!(epoch1 != epoch2);
    }

    #[tokio::test]
    async fn test_cache_h2_premature_end() {
        init();
        let url = "http://127.0.0.1:6148/set_content_length/test_cache_h2_premature_end.txt";
        // try to fill cache
        reqwest::Client::new()
            .get(url)
            .header("x-lock", "true")
            .header("x-h2", "true")
            .header("x-set-content-length", "13") // 2 more than "hello world"
            .send()
            .await
            .unwrap();
        // h2 protocol error with content length mismatch

        // did not get saved into cache, next request will be cache miss
        let res = reqwest::Client::new()
            .get(url)
            .header("x-lock", "true")
            .header("x-h2", "true")
            .header("x-set-content-length", "11")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "hello world");
    }

    #[tokio::test]
    async fn test_cache_bypass_downstream_range() {
        init();
        let test_url =
            "http://127.0.0.1:6148/unique/test_cache_bypass_downstream_range/cache_control/";

        let res = reqwest::Client::new()
            .get(test_url)
            .header("Range", "bytes=6-10")
            // We start this request as cacheable, and then this disables it.
            // We would expect the range body filter to run since we have
            // started to cache, and then disabled.
            .header("set-cache-control", "private, max-age=0")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::PARTIAL_CONTENT);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "no-cache");
        assert_eq!(res.text().await.unwrap(), "world");

        // We're starting as uncacheable, so proxy origin to downstream
        // unchanged.
        let res = reqwest::Client::new()
            .get(test_url)
            // We pass this up to the upstream, but it ignores it.
            .header("Range", "bytes=0-4")
            .header("set-cache-control", "private, max-age=0")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "no-cache");
        assert_eq!(res.text().await.unwrap(), "hello world");
    }

    #[tokio::test]
    async fn test_downstream_head_miss_conn_close_h1() {
        init();

        let test_url =
            "http://127.0.0.1:6148/unique/test_cache_downstream_head_miss_conn_close/sleep/";

        let res = reqwest::Client::new()
            .head(test_url)
            .header("x-set-body-sleep", "1")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "");
        // closed connection does not impact next cache fill

        let res = reqwest::Client::new().get(test_url).send().await.unwrap();
        assert_eq!(res.status(), 200);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), "hello world");
    }

    #[cfg(feature = "any_tls")]
    #[tokio::test]
    async fn test_downstream_head_miss_conn_close_h2() {
        init();

        let test_url =
            "https://127.0.0.1:6153/unique/test_cache_downstream_head_miss_conn_close/sleep/";

        let client = reqwest::Client::builder()
            .danger_accept_invalid_certs(true)
            .build()
            .unwrap();

        let res = client
            .head(test_url)
            .header("x-set-body-sleep", "0")
            .send()
            .await
            .unwrap();
        assert_eq!(res.status(), StatusCode::OK);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "miss");
        assert_eq!(res.text().await.unwrap(), "");
        // closed connection does not impact next cache fill

        let client = reqwest::Client::builder()
            .danger_accept_invalid_certs(true)
            .build()
            .unwrap();

        let res = client.get(test_url).send().await.unwrap();
        assert_eq!(res.status(), 200);
        let headers = res.headers();
        assert_eq!(headers["x-cache-status"], "hit");
        assert_eq!(res.text().await.unwrap(), "hello world");
    }
}


================================================
FILE: pingora-proxy/tests/utils/cert.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use once_cell::sync::Lazy;
#[cfg(feature = "s2n")]
use pingora_core::tls::load_pem_file;
#[cfg(feature = "rustls")]
use pingora_core::tls::{load_pem_file_ca, load_pem_file_private_key};
#[cfg(feature = "openssl_derived")]
use pingora_core::tls::{
    pkey::{PKey, Private},
    x509::X509,
};
use std::fs;

#[cfg(feature = "openssl_derived")]
mod key_types {
    use super::*;
    pub type PrivateKeyType = PKey<Private>;
    pub type CertType = X509;
}

#[cfg(feature = "rustls")]
mod key_types {
    use super::*;
    pub type PrivateKeyType = Vec<u8>;
    pub type CertType = Vec<u8>;
}

#[cfg(feature = "s2n")]
mod key_types {
    use super::*;
    pub type PrivateKeyType = Vec<u8>;
    pub type CertType = Vec<u8>;
}

use key_types::*;

pub static INTERMEDIATE_CERT: Lazy<CertType> = Lazy::new(|| load_cert("keys/intermediate.crt"));
pub static LEAF_CERT: Lazy<CertType> = Lazy::new(|| load_cert("keys/leaf.crt"));
pub static LEAF2_CERT: Lazy<CertType> = Lazy::new(|| load_cert("keys/leaf2.crt"));
pub static LEAF_KEY: Lazy<PrivateKeyType> = Lazy::new(|| load_key("keys/leaf.key"));
pub static LEAF2_KEY: Lazy<PrivateKeyType> = Lazy::new(|| load_key("keys/leaf2.key"));
pub static CURVE_521_TEST_KEY: Lazy<PrivateKeyType> =
    Lazy::new(|| load_key("keys/curve_test.521.key.pem"));
pub static CURVE_521_TEST_CERT: Lazy<CertType> = Lazy::new(|| load_cert("keys/curve_test.521.crt"));
pub static CURVE_384_TEST_KEY: Lazy<PrivateKeyType> =
    Lazy::new(|| load_key("keys/curve_test.384.key.pem"));
pub static CURVE_384_TEST_CERT: Lazy<CertType> = Lazy::new(|| load_cert("keys/curve_test.384.crt"));

#[cfg(feature = "openssl_derived")]
fn load_cert(path: &str) -> X509 {
    let path = format!("{}/{path}", super::conf_dir());
    let cert_bytes = fs::read(path).unwrap();
    X509::from_pem(&cert_bytes).unwrap()
}
#[cfg(feature = "openssl_derived")]
fn load_key(path: &str) -> PKey<Private> {
    let path = format!("{}/{path}", super::conf_dir());
    let key_bytes = fs::read(path).unwrap();
    PKey::private_key_from_pem(&key_bytes).unwrap()
}

#[cfg(feature = "rustls")]
fn load_cert(path: &str) -> Vec<u8> {
    let path = format!("{}/{path}", super::conf_dir());
    load_pem_file_ca(&path).unwrap()
}

#[cfg(feature = "rustls")]
fn load_key(path: &str) -> Vec<u8> {
    let path = format!("{}/{path}", super::conf_dir());
    load_pem_file_private_key(&path).unwrap()
}

#[cfg(feature = "s2n")]
fn load_cert(path: &str) -> Vec<u8> {
    let path = format!("{}/{path}", super::conf_dir());
    load_pem_file(&path).unwrap()
}

#[cfg(feature = "s2n")]
fn load_key(path: &str) -> Vec<u8> {
    let path = format!("{}/{path}", super::conf_dir());
    load_pem_file(&path).unwrap()
}


================================================
FILE: pingora-proxy/tests/utils/conf/keys/README.md
================================================
Some test certificates. The CA is specified in your package directory (grep for ca_file).

Some handy commands:
```
# Describe a pkey
openssl [ec|rsa|...] -in key.pem -noout -text
# Describe a cert
openssl x509 -in some_cert.crt -noout -text

# Generate self-signed cert
openssl ecparam -genkey -name secp256r1 -noout -out test_key.pem
openssl req -new -x509 -key test_key.pem -out test.crt -days 3650 -sha256 -subj '/CN=openrusty.org'

# Generate a cert signed by another
openssl ecparam -genkey -name secp256r1 -noout -out test_key.pem
openssl req -new -key test_key.pem -out test.csr
openssl x509 -req -in test.csr -CA server.crt -CAkey key.pem -CAcreateserial -CAserial test.srl -out test.crt -days 3650 -sha256

# Generate leaf cert
openssl x509 -req -in leaf.csr -CA intermediate.crt -CAkey intermediate.key -out leaf.crt -days 3650 -sha256 -extfile v3.ext

```

```
openssl version
# OpenSSL 3.1.1
echo '[v3_req]' > openssl.cnf
openssl req -config openssl.cnf -new -x509 -key key.pem -out server_rustls.crt -days 3650 -sha256 \
    -subj '/C=US/ST=CA/L=San Francisco/O=Cloudflare, Inc/CN=openrusty.org' \
    -addext "subjectAltName=DNS:*.openrusty.org,DNS:openrusty.org,DNS:cat.com,DNS:dog.com"
```


# Specific Examples

## Updating `intermediate.crt` certificate

```
# Generate the key file
openssl genrsa -out intermediate.key 2048

# Generate the signing request with the subject details filled in
openssl req -new -key intermediate.key -out intermediate.csr -subj '/C=US/ST=CA/O=Intermediate CA/CN=int.pingora.org'

# Evaluate the signing request with the root certificate
openssl x509 -req -in intermediate.csr -CA root.crt -CAkey root.key -CAcreateserial -CAserial intermediate.srl -extfile intermediate.cnf -extensions v3_intermediate_ca -out intermediate.crt -days 3650 -sha256 
```

## Updating `leaf.crt` certificate using the intermediate cert

```
# Generate the key file
openssl genrsa -out leaf.key 2048

# Generate the signing request with the subject details filled in
openssl req -new -key leaf.key -out leaf.csr -subj '/C=US/ST=CA/O=Internet Widgits Pty Ltd/CN=pingora.org'

# Evaluate the signing request with the root certificate
openssl x509 -req -in leaf.csr -CA intermediate.crt -CAkey intermediate.key -CAcreateserial -CAserial leaf.srl -extfile leaf.cnf -extensions v3 -out leaf.crt -days 3650 -sha256 
```


================================================
FILE: pingora-proxy/tests/utils/conf/keys/ca1.crt
================================================
-----BEGIN CERTIFICATE-----
MIIFbTCCA1UCFDsRVhSk+Asz9Q9BwsvZucCbYA5/MA0GCSqGSIb3DQEBCwUAMHMx
CzAJBgNVBAYTAlVTMRMwEQYDVQQIDApDYWxpZm9ybmlhMRYwFAYDVQQHDA1TYW4g
RnJhbmNpc2NvMR4wHAYDVQQKDBVDbG91ZGZsYXJlIFRlc3QgU3VpdGUxFzAVBgNV
BAMMDnNlbGZzaWduZWQuY29tMB4XDTIwMDkxODIwMzk1MloXDTMwMDkxNjIwMzk1
MlowczELMAkGA1UEBhMCVVMxEzARBgNVBAgMCkNhbGlmb3JuaWExFjAUBgNVBAcM
DVNhbiBGcmFuY2lzY28xHjAcBgNVBAoMFUNsb3VkZmxhcmUgVGVzdCBTdWl0ZTEX
MBUGA1UEAwwOc2VsZnNpZ25lZC5jb20wggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAw
ggIKAoICAQCuFbjnE8gTFMrcCXmiP4t1wrK0uW5JSvWpxZAfTHroka/o8wBcKa1c
7dXOGSEzKkTdsmrAkvi2KXMEAd08iwnY52xQ3vpaQDCiBhJhLUGaG2nJ5iH6A3CX
VfsoHccFTp3N4/iiCjxyxnUoQZW1fuun5A9cow6F8xNa7EPtPMJsK7nUYDW2PLj4
881aphUM483gS/Ph5IpaZs6bRP0HyscdSC8hoIZxkOfIgp8a9BvgnaK8cPhoNGFl
HNu4hU+0cxjke/iz9iKRHtdcyuKnRMv8kt+acTpdgWl5E4nmvwXFloPeUuUAEgcc
qcp9Uai2dp9XKfxAGW2wEQPpZseDH7mZ7+NwqxJ2z4R55fdIn8jmALJdz+npvpRr
QHHc6k9jv0iYv9XwZOqT1crlzwcCo3x8A7oD+sJrat5oY1zBXjNzLpb9DKyVQ1em
Ho/7VrLFtK+rJzI/b7D0r6GKk/h3SeqxmgN22fFPcbEM2eUIibUvmCB4OLooWkBs
eSeDr5wMZ7u9ExljGLywKHnOQQ7dlVUWeN5cncv9yU05fWE/whPEOri1ksyNdYr6
kjIY1NYKmXfRaKaR9/JCVkhZj0H8VI6QpkqVHKgI5UMeE5dHMYbxJv0lmG+w6XN1
Zew7DZRTidlBa6COxgCeQydxRTORCCPYQVYAGY5XiYtmWLGmsQjC1QIDAQABMA0G
CSqGSIb3DQEBCwUAA4ICAQAgGv+gvw5X9ftkGu/FEFK15dLHlFZ25tKHJw3LhJEf
xlDOCFI/zR+h2PFdVzks14LLrf4sSkRfJVkk2Qe5uRhHLcgnPIkCkJpGlpFMx2+V
O6azhJlnLEYeVXuzNiQHC+9LJH8i3NK37O8Z1z2EGsAz9kR09OBEvgDjSXFxCN0J
KLAMe4wfAhjUUt9/0bm9u7FYWyj0D5dUVeAul9X3Vo1HfffNovq2cuUlL1AG5Ku+
nPkxGckBo/Lc7jZQRcoZ2+mtvsfyMH5l9OW6JRrnC/Rf5P9bEjUcAskMh5WRdHSL
j98oCkosxg2ndTXke091lToqr7sZ1kiGA+Bj4cPlVXckQn3WU7GiUSSRqotZtn8g
EMT2iqHH3/iJOgtDe8XPWdBYNDeDFRVNpOtgCuYLXdz/Vli0Cecm3escbW/+GZ9P
vgZoNUej8/WTWHNy732N1cHvSbT3kLN6uONP4wNelh+UnfmiG10O54x7iaM3grt9
YvQ1I1G60NCj1tF9KvrCYCK/wnXnTWhlNZ4y+XbILFqE+k8zqiNzGZV9a8FAzht2
APsm2JzzZz6Ph6Zw8fVOS/LX7WgF/kNe5nIzVLqyFXtFxgomXaoxbADUTe16TVb3
6sV8p7nlq2r7Dr0+uROm7ZEg1F23SiieDoRvw5fUbRhZCU93fv7Nt7hWlKP+UqJj
Zg==
-----END CERTIFICATE-----


================================================
FILE: pingora-proxy/tests/utils/conf/keys/ca1.key.pem
================================================
-----BEGIN RSA PRIVATE KEY-----
MIIJKAIBAAKCAgEArhW45xPIExTK3Al5oj+LdcKytLluSUr1qcWQH0x66JGv6PMA
XCmtXO3VzhkhMypE3bJqwJL4tilzBAHdPIsJ2OdsUN76WkAwogYSYS1BmhtpyeYh
+gNwl1X7KB3HBU6dzeP4ogo8csZ1KEGVtX7rp+QPXKMOhfMTWuxD7TzCbCu51GA1
tjy4+PPNWqYVDOPN4Evz4eSKWmbOm0T9B8rHHUgvIaCGcZDnyIKfGvQb4J2ivHD4
aDRhZRzbuIVPtHMY5Hv4s/YikR7XXMrip0TL/JLfmnE6XYFpeROJ5r8FxZaD3lLl
ABIHHKnKfVGotnafVyn8QBltsBED6WbHgx+5me/jcKsSds+EeeX3SJ/I5gCyXc/p
6b6Ua0Bx3OpPY79ImL/V8GTqk9XK5c8HAqN8fAO6A/rCa2reaGNcwV4zcy6W/Qys
lUNXph6P+1ayxbSvqycyP2+w9K+hipP4d0nqsZoDdtnxT3GxDNnlCIm1L5ggeDi6
KFpAbHkng6+cDGe7vRMZYxi8sCh5zkEO3ZVVFnjeXJ3L/clNOX1hP8ITxDq4tZLM
jXWK+pIyGNTWCpl30WimkffyQlZIWY9B/FSOkKZKlRyoCOVDHhOXRzGG8Sb9JZhv
sOlzdWXsOw2UU4nZQWugjsYAnkMncUUzkQgj2EFWABmOV4mLZlixprEIwtUCAwEA
AQKCAgBP/nVX4dQnSH+rOsNk1fRcqZn6x9aw4TwfxkPizf8QjZma3scEkrYyJKwB
p7SE0WCRyyGY2jBlbIiIh97EqlNdE4LHap76B9MRMN8TPnuNuBkViKWGQDxlnkHp
/jzs6GJFMQOYWkHKr/04AWMs4mShYn/YnqjWzorPVhAknK3ujO04dPlZg2+wHj/3
7qdvo+J/tgccfytAPUulN79Z7Ekw4HGf7ya4WtDXZ4Z7GT8SKP2VwAe1wpQapXcl
xESK8/S1UW5IK8tYiiaGYkhieo+NwWP0kSEzxHrWAy90E8UwNWjlKYxHSwFvn2oH
yhVPuxSfNhDO16B6rmbwwqTdUR+0pepF9IcgWuGO/AAMPlo6tKKqo7oW8xUqX0EW
vSCdISLlOITe2GBFv0q1xcUG9xZM5/Hde4NPU6OpghFcM/Okl3MoGqvqH4Fcd2Lm
HsjHxE6/8pDvxy8wGMeHEYTcDnKdTGPQgyEHHTZBsoHOzrM7CXGgpGIj9DPxrJO+
VZFHqoILRbhiU3LTnyb5J8X8zyPv064LOoZOu2JoY99E2j1PtI4ym1fAzhd5ScU7
X2CJTXAA57e0ezZCuPh/isgHmhx3bFHUvluWPKyspchLy/Pk28382jgnM+/vdbZh
wObGpeLpIEylxMmMROxZSDiDFhwG/rrp08vmhJRjgCb6XRAiZQKCAQEA1dnTbqve
aMioUOt70U6kuokJGpibmp5CXD+yGev4QZiVuoZvURfqqwRoMyJ2jaALAYRJpRMc
tbBvi3iq+uhB4FFiSCegK+F3qYPpgPvC2kHDrqgm4Fdjhz0/CfkkihzyVae5BHU9
nm9xS39vmHKtPdM4Yt8n/vGXZy06pKRo2gxA4K5UswtJ3GGgKY+/dgRgXGS7eIaw
2b1uLvIZ8p2XGzMbjAtaTEykAQXMX7mPanpizT8LguvxCAFo2QyzCMJyuUii8pQS
H/ewKGVd3zZVN3KgWnGWoYpnRaY/eG6O60APV625yRgI0k4CZucWK8wuNU4TGpy7
YCnJSX3q/nIh9wKCAQEA0GVwvHjqWTOCpeTw5+QJq/0dvTVmaJzUe+EfBYpgaqd3
K+Lcj3TuNr+/v8sJ6ZhvflBxLI9Yk9kmfHMVsCrTXPr2mkmZMeucbM6Lot9tcYAN
FX+LKFIz9uDMXhMZfnycEurEdwlo1A1c4dpYEaOJ+OCmzglg7Bfxq7ol32MlVg8e
06VyjfFVR2fNzlRUFX/DZrI8mjgsVone/eJNGLYPUhXMZ905vfQFefP9DijTtecZ
AcPkhMMCXaldtuZ9WE9SRnV0HRpggDFdA+7AJnqp9umc3S1yv1YQvSFomAH+Aszs
LKuwS4VPwZWNiMHqRlQrZ6lKa+rMWSowHiJCgIpOkwKCAQEAyiSeLIX/tXK/T8ZY
gxBgvAae+Wn55Fzmg4aeFsysHW1bUzaScMg3xbJjwLo58EOxQ5zFdGmtgL0no2HL
1WLIKn8jdOsoB3KYBz+u8IKKvH7ftvAx12wjo4msVgQQmxEjrP3e8SzVszbKlEAA
v8zen4tSSHuCtgWuRRRG06yphDuC9B815wyro8sQd1ju9WLLp2p8n0BKWXgrd+rX
xjNay5Yy2t08XNUxTdoqRu4Dd/X6AOMwQXA/pX6XmlvbvFL52NSlWsHGpDsgY/71
jfIw+Tm8A+JNLaPDXN36Lx/qrssd9ZY9AK5cYFbnBFg55+qYX0DO5B/1KsA1Cegh
wqUmHwKCAQBw/r/NAccXzM1HREa3hbcU0W7hm+XGTVsNPHiEmY5D5j/AxQaQpndP
qlK/HMloJqY1mEp1PdhqejDbA8+7sMzgOpeh+swc/ELZ4HhoPLtr8mGlyX1bxI62
ixdk3vhQ1CIQQ8l5PdngOMqnD6v3DHSQRMdNKlqqSSVZ1toYMPsamaI+YhQmELgL
uqYl/SWGbrs1oOkpOdIYrjMB+EWTY4wVFwq5OoPHkluxz3Djz5FTrVWq1lu+/Ln4
rQ/KT1mhm4jh+WeXLCks+RcVPcxkUNh9sBfE+ZKhWnpDAq1i1pmzTQe2BPXXTRZ8
wal3gKWVsqfCUlGvCCX7JtvmSu9CITwPAoIBAEQO6PQh3nD/tJSFZxgtPVp7r3Px
+QEnE68Y0B0veq9g5SBovg4KADTcHbIbRymOBw+9skB65pxdoV3EFGmEXpMm5+5b
HC/DTXf2hEuKb49VO52NbbthiZg+xsnitEv4ZBfSVBRw+nL3Dx5c30M9wG/3OdGX
OWPYFoIJZDlyy3ynZtiGrjHgNqi/coHdsYVLfMkc+/hidApzhoApDkFGusVB6GHB
fTSeyuGfh39120LVnhFjDr+SpfyIXNJIiCwizLJtc1WliTtQzd/Fh1M62qO6ye4/
3M24xoaVCDgzNrSibELkiLTmqEA4cZwtN5BqhfnQa+Prujd5ElmABZSqDz8=
-----END RSA PRIVATE KEY-----


================================================
FILE: pingora-proxy/tests/utils/conf/keys/ca2.crt
================================================
-----BEGIN CERTIFICATE-----
MIIFbTCCA1UCFDsRVhSk+Asz9Q9BwsvZucCbYA5/MA0GCSqGSIb3DQEBCwUAMHMx
CzAJBgNVBAYTAlVTMRMwEQYDVQQIDApDYWxpZm9ybmlhMRYwFAYDVQQHDA1TYW4g
RnJhbmNpc2NvMR4wHAYDVQQKDBVDbG91ZGZsYXJlIFRlc3QgU3VpdGUxFzAVBgNV
BAMMDnNlbGZzaWduZWQuY29tMB4XDTIwMDkxODIwMzk1MloXDTMwMDkxNjIwMzk1
MlowczELMAkGA1UEBhMCVVMxEzARBgNVBAgMCkNhbGlmb3JuaWExFjAUBgNVBAcM
DVNhbiBGcmFuY2lzY28xHjAcBgNVBAoMFUNsb3VkZmxhcmUgVGVzdCBTdWl0ZTEX
MBUGA1UEAwwOc2VsZnNpZ25lZC5jb20wggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAw
ggIKAoICAQCuFbjnE8gTFMrcCXmiP4t1wrK0uW5JSvWpxZAfTHroka/o8wBcKa1c
7dXOGSEzKkTdsmrAkvi2KXMEAd08iwnY52xQ3vpaQDCiBhJhLUGaG2nJ5iH6A3CX
VfsoHccFTp3N4/iiCjxyxnUoQZW1fuun5A9cow6F8xNa7EPtPMJsK7nUYDW2PLj4
881aphUM483gS/Ph5IpaZs6bRP0HyscdSC8hoIZxkOfIgp8a9BvgnaK8cPhoNGFl
HNu4hU+0cxjke/iz9iKRHtdcyuKnRMv8kt+acTpdgWl5E4nmvwXFloPeUuUAEgcc
qcp9Uai2dp9XKfxAGW2wEQPpZseDH7mZ7+NwqxJ2z4R55fdIn8jmALJdz+npvpRr
QHHc6k9jv0iYv9XwZOqT1crlzwcCo3x8A7oD+sJrat5oY1zBXjNzLpb9DKyVQ1em
Ho/7VrLFtK+rJzI/b7D0r6GKk/h3SeqxmgN22fFPcbEM2eUIibUvmCB4OLooWkBs
eSeDr5wMZ7u9ExljGLywKHnOQQ7dlVUWeN5cncv9yU05fWE/whPEOri1ksyNdYr6
kjIY1NYKmXfRaKaR9/JCVkhZj0H8VI6QpkqVHKgI5UMeE5dHMYbxJv0lmG+w6XN1
Zew7DZRTidlBa6COxgCeQydxRTORCCPYQVYAGY5XiYtmWLGmsQjC1QIDAQABMA0G
CSqGSIb3DQEBCwUAA4ICAQAgGv+gvw5X9ftkGu/FEFK15dLHlFZ25tKHJw3LhJEf
xlDOCFI/zR+h2PFdVzks14LLrf4sSkRfJVkk2Qe5uRhHLcgnPIkCkJpGlpFMx2+V
O6azhJlnLEYeVXuzNiQHC+9LJH8i3NK37O8Z1z2EGsAz9kR09OBEvgDjSXFxCN0J
KLAMe4wfAhjUUt9/0bm9u7FYWyj0D5dUVeAul9X3Vo1HfffNovq2cuUlL1AG5Ku+
nPkxGckBo/Lc7jZQRcoZ2+mtvsfyMH5l9OW6JRrnC/Rf5P9bEjUcAskMh5WRdHSL
j98oCkosxg2ndTXke091lToqr7sZ1kiGA+Bj4cPlVXckQn3WU7GiUSSRqotZtn8g
EMT2iqHH3/iJOgtDe8XPWdBYNDeDFRVNpOtgCuYLXdz/Vli0Cecm3escbW/+GZ9P
vgZoNUej8/WTWHNy732N1cHvSbT3kLN6uONP4wNelh+UnfmiG10O54x7iaM3grt9
YvQ1I1G60NCj1tF9KvrCYCK/wnXnTWhlNZ4y+XbILFqE+k8zqiNzGZV9a8FAzht2
APsm2JzzZz6Ph6Zw8fVOS/LX7WgF/kNe5nIzVLqyFXtFxgomXaoxbADUTe16TVb3
6sV8p7nlq2r7Dr0+uROm7ZEg1F23SiieDoRvw5fUbRhZCU93fv7Nt7hWlKP+UqJj
Zg==
-----END CERTIFICATE-----


================================================
FILE: pingora-proxy/tests/utils/conf/keys/ca_chain.cert
================================================
-----BEGIN CERTIFICATE-----
MIIEjjCCAnagAwIBAgIUHIB/tqjZJaKIgeWwvXRt03C0yIMwDQYJKoZIhvcNAQEL
BQAwXzELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAkNBMRYwFAYDVQQHDA1TYW4gRnJh
bmNpc2NvMRAwDgYDVQQKDAdSb290IENBMRkwFwYDVQQDDBByb290LnBpbmdvcmEu
b3JnMB4XDTIyMTExMDE5MzI0M1oXDTI1MDgwNjE5MzI0M1owTjELMAkGA1UEBhMC
VVMxCzAJBgNVBAgMAkNBMRgwFgYDVQQKDA9JbnRlcm1lZGlhdGUgQ0ExGDAWBgNV
BAMMD2ludC5waW5nb3JhLm9yZzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC
ggEBAL4klMT1Bc4vYWN7zF+x7x34s3L51Sve3AVydGGtzej2hC3m4CictVfKfkC6
jMNRo3mpUsnAJSbyRh91fec8nnOT8MEYnmm05Lbf5DG4RULrKSg52zge4SFTLO2n
2eCa4SYwRpj+MQmFrCQ++s9gJ/5weN95z23XAS1EL2GK50Z/fKQfRCo+aZTRB6dU
KK2cUwuDAHTkVSePVAX8KGcZu2Qm/jTBlcDIfn7OmTu2g/n5YSRJg3MWKeJlAbVo
VNxmaRYQOs2X7y4WwcSAfEncyVXRzqFxEfSDnq2A2+pp/sKoCjTgE6n94SzyqyFm
yJ8FmvV79qCDHSaeIhR5qQEIlO8CAwEAAaNTMFEwHQYDVR0OBBYEFP5ivTJr/S6Z
VpOI4+JykGPID8s3MB8GA1UdIwQYMBaAFJ5hR0odQYOtYsY3P18WIC2byI1oMA8G
A1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQELBQADggIBAM337XP2Hm7LE3KLW+nn
khyj82ahj2k2+H/OdwGqzfqHCObTP+ydNJhOQVD+r255qQC9eAvd6ivF/h1tJOvv
Ed8vQMfLCO9VDFy6KCmlZQV6djRU1QXJIR/jf7TNqrFOcuKPGv5Vs6JwDdaHc0ae
ug7CGppnu5cxf/04sa7pWOdCFbhDRtfooo9fgGN2jcTFqfGyzocBwx7dgqEmZkae
yJAH0x4ldpKM9aO44h0Uy36c5RaWmdyFIh88QW62NoHamfwZoaVyycn82wcP4fFG
PRHm/AaDkYFGiQy22y7DD+MeZNUgCcAJpDYxfe87Cm4dw9NweMF6Jpo/8Ib1oLPq
E3miiFjWQwpMhxSQxpjqR92FPs9+/ktvYqbbMlyu/tju0rK17DXUi1zSIHoydPt0
ymwWMxg7Jxpmg0x+eyWr5CP/ULM+F2Tk9W7x0B5DnpDJeCk+1ydUhII9AnTOCUWs
0VRlqTgFKahkHfiLBjPaLCgA0D3dz06EfEq5tmC8t0MDAqw9M4bDdow29K0aN6K8
Gax7S5EK9aK09+HJ+7T5uxkUC+iIzfk53RhAfQiXdyKPpkbndRP67OiaAwk+hIXm
U1d1GsC854KYQs2GtHHvBcTGEADfU36TF/w2oJYQIrBjd23ZCf9jFK/WQ5GBFitT
ljoURxQQQy3LGjcH8W18JdRE
-----END CERTIFICATE-----
-----BEGIN CERTIFICATE-----
MIIFnzCCA4egAwIBAgIUE5kg5Z26V4swShJoSwfNVsJkHbYwDQYJKoZIhvcNAQEL
BQAwXzELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAkNBMRYwFAYDVQQHDA1TYW4gRnJh
bmNpc2NvMRAwDgYDVQQKDAdSb290IENBMRkwFwYDVQQDDBByb290LnBpbmdvcmEu
b3JnMB4XDTIyMTExMDE5MjY1MFoXDTQyMTExMDE5MjY1MFowXzELMAkGA1UEBhMC
VVMxCzAJBgNVBAgMAkNBMRYwFAYDVQQHDA1TYW4gRnJhbmNpc2NvMRAwDgYDVQQK
DAdSb290IENBMRkwFwYDVQQDDBByb290LnBpbmdvcmEub3JnMIICIjANBgkqhkiG
9w0BAQEFAAOCAg8AMIICCgKCAgEA4s1XxwZruaRwuDX1IkM2oxdSdjg7FeUp8lsN
Uix4NdXz8IoQWRzCfFuRBKFHptahutSO6Bbewm9XmU2hHG7aoCqaZqEVQ/3KRLZ4
mzaNBCzDNgPTmDkz/DZKzOVuyVvbmTOsLn53yxKnFP9MEDIEemqGiM80MmFfCm/o
0vLkjwkRpreMsWPUhrq3igTWRctUYMJAeDsEaaXB1k5ovWICrEylMzslgSNfoBed
NmBpurz+yQddKNMTb/SLYxa7B1uZKDRSIXwwOZPdBDyUdlStUPodNG/OzprN+bRC
oFRB9EFG1m5oPJXQIalePj0dwhXl/bkV4uRxCSZmBZK3fbtLMF+Wkg2voTrn51Yv
lKkzUQoEX6WWtUameZZbUB8TbW2lmANuvGBmvBbj3+4ztmtJPXfJBkckCeUC6bwC
4CKrgB587ElY357Vqv/HmRRC9kxdzpOS9s5CtcqJ3Dg1TmLajyRQkf8wMqk0fhh7
V+VrPXB030MGABXh5+B2HOsF307vF030v7z+Xp5VRLGBqmDwK0Reo2h8cg9PkMDS
5Qc2zOJVslkJ+QYdkea1ajVpCsFbaC1JPmRWihTllboUqsk9oSS3jcIZ8vW3QKMg
ZbKtVbtVHr3mNGWuVs96iDN5Us3SJ6KGS8sanrAYAAB/NKd1Wl3I0aVtcb6eOONd
edf9+b0CAwEAAaNTMFEwHQYDVR0OBBYEFJ5hR0odQYOtYsY3P18WIC2byI1oMB8G
A1UdIwQYMBaAFJ5hR0odQYOtYsY3P18WIC2byI1oMA8GA1UdEwEB/wQFMAMBAf8w
DQYJKoZIhvcNAQELBQADggIBAIrpAsrPre3R4RY0JmnvomgH+tCSMHb6dW52YrEl
JkEG4cVc5MKs5QfPp8l2d1DngqiOUnOf0MWwWNDidHQZKrWs59j67L8qKN91VQKe
cSNEX3iMFvE59Hr0Ner6Kr09wZLHVVNGcy0FdhWpJdDUGDoQjfL7n7usJyCUqWSq
/pa1I9Is3ZfeQ5f7Ztrdz35vVPj+0BlHXbZM5AZi8Dwf3vXFBlPty3fITpE65cty
cYnbpGto+wDoZj9fkKImjK21QsJdmHwaWRgmXX3WbdFBAbScTjDOc5Mls2VY8rSh
+xLI1KMB0FHSJqrGoFN3uE+G1vJX/hgn98KZKob23yJr2TWr9LHI56sMfN5xdd5A
iOHxYODSrIAi1k+bSlDz6WfEtufoqwBwHiog4nFOXrlHpGO6eUB1QjaQJZwKn2zE
3BjqJOoqbuBMg5XZRjihHcVVuZdU39/zQDwqliNpx3km4FzOiEoBABGzLP+Qt0Ch
cJFS1Yc8ffv616yP4A9qkyogk9YBBvNbDLB7WV8h8p1s4JP3f5aDUlxtAD+E+3aJ
8mrb3P7/0A2QyxlgX4qQOdj++b7GzXDxxLgOimJ4pLo0fdY8KWMeHvZPiMryHkMx
3GSZCHeleSVBCPB2pPCzUqkkKADbjBX3SYJsAMF9uXQAR4U7wojjvAmbt6vJEh6j
TEUG
-----END CERTIFICATE-----


================================================
FILE: pingora-proxy/tests/utils/conf/keys/ca_chain.srl
================================================
764CA822243398735D12CB8F1295AEDF38869BA7


================================================
FILE: pingora-proxy/tests/utils/conf/keys/cert_chain.crt
================================================
-----BEGIN CERTIFICATE-----
MIICtzCCAl2gAwIBAgIUC8kzFXZNRqjR158InTieHg1VrWowCgYIKoZIzj0EAwIw
gY4xCzAJBgNVBAYTAlVTMRMwEQYDVQQIEwpDYWxpZm9ybmlhMRYwFAYDVQQHEw1T
YW4gRnJhbmNpc2NvMRgwFgYDVQQKEw9IYXBweUNlcnQsIEluYy4xHzAdBgNVBAsT
FkhhcHB5Q2VydCBJbnRlcm1lZGlhdGUxFzAVBgNVBAMTDihkZXYgdXNlIG9ubHkp
MCAXDTE5MTIwOTE5NDgwMFoYDzIxMTkxMTE1MTk0ODAwWjCBgTELMAkGA1UEBhMC
VVMxEzARBgNVBAgTCkNhbGlmb3JuaWExFjAUBgNVBAcTDVNhbiBGcmFuY2lzY28x
GTAXBgNVBAoTEERFUiBpcyBGdW4sIEluYy4xETAPBgNVBAsTCEVuY29kaW5nMRcw
FQYDVQQDEw4oZGV2IHVzZSBvbmx5KTBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IA
BJSBMLYEVPgjmd2vWgMpN9LupZa56T7Ds1+wAlyMphLDN56PWuphsrNsEwiIIeNv
MtRTPRuoiBkfvMiWON6nkGWjgaEwgZ4wDgYDVR0PAQH/BAQDAgWgMBMGA1UdJQQM
MAoGCCsGAQUFBwMBMAwGA1UdEwEB/wQCMAAwHQYDVR0OBBYEFOYNuOCrYKnTFIEV
ck5845y/yZHkMB8GA1UdIwQYMBaAFFZRXwepqUwm9Kh+repV7LkBDnEHMCkGA1Ud
EQQiMCCCCWRlcmlzLmZ1boITd2VsbGtub3duLmRlcmlzLmZ1bjAKBggqhkjOPQQD
AgNIADBFAiEA9XAQ1Xi4Lav8LKzXZMSOHHj21ycqf3grnUfKJ6iwRvkCIDevfipo
qIuR/Dnt1bBoXxFKv0w/LpH/89jIohUQwVSc
-----END CERTIFICATE-----
-----BEGIN CERTIFICATE-----
MIIDwzCCAqugAwIBAgIJAN0mCzwZkgZKMA0GCSqGSIb3DQEBCwUAMHgxCzAJBgNV
BAYTAlVTMRMwEQYDVQQIDApDYWxpZm9ybmlhMRYwFAYDVQQHDA1TYW4gRnJhbmNp
c2NvMRgwFgYDVQQKDA9DbG91ZEZsYXJlLCBJbmMxDDAKBgNVBAsMA1ImRDEUMBIG
A1UEAwwLZXhhbXBsZS5jb20wHhcNMTYwNjMwMTY1NTM5WhcNMzYwNjI1MTY1NTM5
WjB4MQswCQYDVQQGEwJVUzETMBEGA1UECAwKQ2FsaWZvcm5pYTEWMBQGA1UEBwwN
U2FuIEZyYW5jaXNjbzEYMBYGA1UECgwPQ2xvdWRGbGFyZSwgSW5jMQwwCgYDVQQL
DANSJkQxFDASBgNVBAMMC2V4YW1wbGUuY29tMIIBIjANBgkqhkiG9w0BAQEFAAOC
AQ8AMIIBCgKCAQEA7y+v+9Eh2LjFoZbUetrJc+IVPb92PBNNY5AM+Nxukzj/9hth
tu7UPFnO+USrh+nFtR/rFfC6UwUqCtPaQ4EkSVJslR8f34GoOlc8zz7+dq9sGGu0
hUPCLiptfBdIu73l0XqMd+xdGprl8hMdpH0CyKhAqTpv/00cmFobFwm1Fbf146hb
YAhyP6rIzDlrhvYFe3sFwAIjXQ0qyN+ffm/Ot1iFdYER24sl63XfwBPS97DwO70p
4jtbea8zlN58CFmTTK899J1f4MGbzvMyttdHG+WjhLNplB7fhtBdiHes2EdQws2S
TKbK5D/69OYXSVCwimcOnlklcJ1NpQJFFaWeKQIDAQABo1AwTjAdBgNVHQ4EFgQU
cu65A8EdrKWjFy9PZSRvSu8+4G0wHwYDVR0jBBgwFoAUcu65A8EdrKWjFy9PZSRv
Su8+4G0wDAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQsFAAOCAQEAl3lAgKb+3NQ/
+a+ooaML1Ndmh7h+UWl4bXx1TXwaLAi0iIujrAa3AM86vqKxFeCwZC9bAPEyGQrH
AF8JQbWAa2SckSDPSxM1ETV7EtJS4plaSfWxzX/m8jtd7D5RbzyE/qUH5JsXvCta
rKOMJPNvSfTuxQMX/Qyp0cHZUr/3ylUhdLWYsNwTAlQgx0OK8w+zWx6ESCM52Cz4
Gqjpgcq6qylE2RoNmY0L+xb1B0YS+fslcjSXJZ/Z1j9mVrUM4wuekgcIxJfUrfhv
/957d4I04iMp6F/XgrrKUewCGiifcDi87nwoqHJwSIWG33LTb4e8mSe4Y83Fh8L2
KWQDqcnYug==
-----END CERTIFICATE-----

================================================
FILE: pingora-proxy/tests/utils/conf/keys/curve_test.384.crt
================================================
-----BEGIN CERTIFICATE-----
MIIBnDCCASOgAwIBAgIJAJ8dDVMCYWE3MAoGCCqGSM49BAMDMBsxGTAXBgNVBAMM
EG9wZW5ydXN0eTM4NC5vcmcwHhcNMjMwNDA3MTY0NzEyWhcNMzMwNDA0MTY0NzEy
WjAbMRkwFwYDVQQDDBBvcGVucnVzdHkzODQub3JnMHYwEAYHKoZIzj0CAQYFK4EE
ACIDYgAENKtL8ciBDxA9G2auTbtbteNu8DI7gp0039+J6Z29laQpHLMw8MH7Wegx
HTv9RTXcf1sTCBloZh8qTvZTDh1yi7kjhZ2yLdVEVoakC5HBKvWzo1ewjSkOfBX7
LF4p/8ULozMwMTAvBgNVHREEKDAmghIqLm9wZW5ydXN0eTM4NC5vcmeCEG9wZW5y
dXN0eTM4NC5vcmcwCgYIKoZIzj0EAwMDZwAwZAIwL8ad/dyrC62bFC7gGZkRzaTm
r2XlaMk6LB02IbVJgQytu+p50pnAgELVXISLP8LIAjBAjQ71pDbCjfg8Ts6iOnWH
p4R+Z2BjbTZu+Kmn1x8nyo2OJcchRYTRAKMS7YWstIk=
-----END CERTIFICATE-----


================================================
FILE: pingora-proxy/tests/utils/conf/keys/curve_test.384.key.pem
================================================
-----BEGIN EC PRIVATE KEY-----
MIGkAgEBBDCWPID9PlALCL+dNPdlEBw2fP4cU56akYDeV08fpY+DkhaJicPxAilY
2T68Epv7nh6gBwYFK4EEACKhZANiAAQ0q0vxyIEPED0bZq5Nu1u1427wMjuCnTTf
34npnb2VpCkcszDwwftZ6DEdO/1FNdx/WxMIGWhmHypO9lMOHXKLuSOFnbIt1URW
hqQLkcEq9bOjV7CNKQ58FfssXin/xQs=
-----END EC PRIVATE KEY-----


================================================
FILE: pingora-proxy/tests/utils/conf/keys/curve_test.521.crt
================================================
-----BEGIN CERTIFICATE-----
MIIB5zCCAUmgAwIBAgIJALxqm9BrQU12MAoGCCqGSM49BAMEMBsxGTAXBgNVBAMM
EG9wZW5ydXN0eTUyMS5vcmcwHhcNMjMwNDA3MTY0NjU4WhcNMzMwNDA0MTY0NjU4
WjAbMRkwFwYDVQQDDBBvcGVucnVzdHk1MjEub3JnMIGbMBAGByqGSM49AgEGBSuB
BAAjA4GGAAQA9LXDr66Cx/DZYnSacGu0FxlSx/e7xTm49g2QGU7TkO8TEyaOkErl
IaqJE7YxQp+CUMfelVVkUJmVlJ4Fkrl3nR4A3YLDjEYihXnuLZajbwkjC7vzKO8A
O2ln8R5JSzClUoTu7s2nok7tw/6dP4i08YPk4Pkxm5NHIok0uFmoaJpdkq6jMzAx
MC8GA1UdEQQoMCaCEioub3BlbnJ1c3R5NTIxLm9yZ4IQb3BlbnJ1c3R5NTIxLm9y
ZzAKBggqhkjOPQQDBAOBiwAwgYcCQgCdVxTjVAPCIouh1HH4haJDpS1/g30jcTj6
FGvyxofIX4Q6fO3Ig8DlJa+SrDq2f75/f8RSC71NB6peNjP8IARCOAJBKEMcXjK5
btvZxg+puzyxuMNRtUUk/Re/pzzLJbi7o6MWVNgLQJ3d9kUVHzbQEXNiUe82vbYK
uairSMDS6Dl1j/A=
-----END CERTIFICATE-----


================================================
FILE: pingora-proxy/tests/utils/conf/keys/curve_test.521.key.pem
================================================
-----BEGIN EC PRIVATE KEY-----
MIHbAgEBBEFiMUgbEqjcf3K4Ba+CFUv20+ryJq9REjWUkoi9AgkpGuEAqLQza3CM
kSGSiPdm9gWmpeLlCExPVJRbcTmAhoZUcKAHBgUrgQQAI6GBiQOBhgAEAPS1w6+u
gsfw2WJ0mnBrtBcZUsf3u8U5uPYNkBlO05DvExMmjpBK5SGqiRO2MUKfglDH3pVV
ZFCZlZSeBZK5d50eAN2Cw4xGIoV57i2Wo28JIwu78yjvADtpZ/EeSUswpVKE7u7N
p6JO7cP+nT+ItPGD5OD5MZuTRyKJNLhZqGiaXZKu
-----END EC PRIVATE KEY-----


================================================
FILE: pingora-proxy/tests/utils/conf/keys/ex1.crt
================================================
-----BEGIN CERTIFICATE-----
MIICtzCCAl2gAwIBAgIUC8kzFXZNRqjR158InTieHg1VrWowCgYIKoZIzj0EAwIw
gY4xCzAJBgNVBAYTAlVTMRMwEQYDVQQIEwpDYWxpZm9ybmlhMRYwFAYDVQQHEw1T
YW4gRnJhbmNpc2NvMRgwFgYDVQQKEw9IYXBweUNlcnQsIEluYy4xHzAdBgNVBAsT
FkhhcHB5Q2VydCBJbnRlcm1lZGlhdGUxFzAVBgNVBAMTDihkZXYgdXNlIG9ubHkp
MCAXDTE5MTIwOTE5NDgwMFoYDzIxMTkxMTE1MTk0ODAwWjCBgTELMAkGA1UEBhMC
VVMxEzARBgNVBAgTCkNhbGlmb3JuaWExFjAUBgNVBAcTDVNhbiBGcmFuY2lzY28x
GTAXBgNVBAoTEERFUiBpcyBGdW4sIEluYy4xETAPBgNVBAsTCEVuY29kaW5nMRcw
FQYDVQQDEw4oZGV2IHVzZSBvbmx5KTBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IA
BJSBMLYEVPgjmd2vWgMpN9LupZa56T7Ds1+wAlyMphLDN56PWuphsrNsEwiIIeNv
MtRTPRuoiBkfvMiWON6nkGWjgaEwgZ4wDgYDVR0PAQH/BAQDAgWgMBMGA1UdJQQM
MAoGCCsGAQUFBwMBMAwGA1UdEwEB/wQCMAAwHQYDVR0OBBYEFOYNuOCrYKnTFIEV
ck5845y/yZHkMB8GA1UdIwQYMBaAFFZRXwepqUwm9Kh+repV7LkBDnEHMCkGA1Ud
EQQiMCCCCWRlcmlzLmZ1boITd2VsbGtub3duLmRlcmlzLmZ1bjAKBggqhkjOPQQD
AgNIADBFAiEA9XAQ1Xi4Lav8LKzXZMSOHHj21ycqf3grnUfKJ6iwRvkCIDevfipo
qIuR/Dnt1bBoXxFKv0w/LpH/89jIohUQwVSc
-----END CERTIFICATE-----


================================================
FILE: pingora-proxy/tests/utils/conf/keys/ex1.key.b64
================================================
AAEAAJIx7XoAAAABAAAAIAAAAAAAAAACAAH//wAAABAAAAAAW66OYKnvlI3LQETZc85HajCUyhsAAAAAAAAAAAAAAAD+EOoVAAAAAQAAAKAAAAAAAAAAAv////8AAACQAAAAB018vkpfL1Bmrc2c9A5NcT3M3EdG+ZQfTZGN4BHUIpzOXK85cESryj5aFHIOh37fuRZlcCO8i9G44x+xNE45M9nw7tI2D4Sf1zraq9titAqMj3I+I3CZW2LX61CHyMYlfdxG/F7OR7dz1kbUcJeP73l+v65cPIEwek6gzvTZOIz2W8AnFdc0jW3iZFcgAhPmJzkBs4EAAAABAAAAMAAAAAAAAAACAAAAAAAAACAAAAAM0IInmYQDB4EBkHw182qCs6LncTgAAAAAAAAAAAAAAAA=

================================================
FILE: pingora-proxy/tests/utils/conf/keys/intermediate.cnf
================================================
[ v3_intermediate_ca ]
subjectKeyIdentifier = hash
basicConstraints = critical, CA:true


================================================
FILE: pingora-proxy/tests/utils/conf/keys/intermediate.crt
================================================
-----BEGIN CERTIFICATE-----
MIIEjjCCAnagAwIBAgIUGZ1/e3L6KJLlioDsIF7mOiBUO+UwDQYJKoZIhvcNAQEL
BQAwXzELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAkNBMRYwFAYDVQQHDA1TYW4gRnJh
bmNpc2NvMRAwDgYDVQQKDAdSb290IENBMRkwFwYDVQQDDBByb290LnBpbmdvcmEu
b3JnMB4XDTI1MDgxMjE5NTkxOFoXDTM1MDgxMDE5NTkxOFowTjELMAkGA1UEBhMC
VVMxCzAJBgNVBAgMAkNBMRgwFgYDVQQKDA9JbnRlcm1lZGlhdGUgQ0ExGDAWBgNV
BAMMD2ludC5waW5nb3JhLm9yZzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoC
ggEBAPLP2xGvLbhFS3iD7GyRk6e5Kc1FEb/0be39kL5POBGBl7SatwlIXzvc2RSW
3Gla7ayvRiWo1WXDzuNv2M3Zu/uFmHkBa550Q2hWzD8StuWuDLVS79LVFcYOCE7r
35fXZDAS1H63flgX5kxjuIGyR6gnZMrqLESoyYUpP+G6b3chv98n+ecCsVWNeFkx
qxZibz+oVHfx1OoOUZSvvlQAt/5jhfKtDbCvUl5uz8VyxY4QWDUY92wx5JO+fbaA
H0UCrb5MPCutqeKrtVInomRKzs+3pSAKLyDbhNGJgeCI99a1cFRt02Uh3tssQbVS
oBJ0P8ktcdlshS3HuM5zOcrc738CAwEAAaNTMFEwHQYDVR0OBBYEFCMHGxO+uP5T
CjLDHk4ZFAMrY8l5MA8GA1UdEwEB/wQFMAMBAf8wHwYDVR0jBBgwFoAUnmFHSh1B
g61ixjc/XxYgLZvIjWgwDQYJKoZIhvcNAQELBQADggIBAMJy3c/J36teAqy26+6N
oBRAe+9v8eTwg7hf6ZjaK8mhakc5AyIi5yTVdaW4CxZp1P934fLExCclwlY4GgUC
6kaCpCA/E47q4mOkhQcMu/V2Kfq4SQ7yZzEfc7OOwnSKO2iyEiAiOqg8B6w6Mwt4
eKZ5AdiKTst1BG+rwIPayMekIcJ2Sg4DB9qGCCTQutgEdu8sE+/znZIocJypxAif
WoZTnNmp7J96cM5MSBN1NNRT+xWgqsZWPMV+qaxRRU9e6TRpSJVKVBkSUdufXl8L
M8d0D/ypv5dwhNQuuVhLEVqSGdzSDO0CyQdZNFVO2/DMgAfYaDs+ayh+93GSM4Ey
Cd6MQE583WjW2KzHbzykBZXj/FOoAw+HKvFhv+aG4GDO1xoyY/ATQBSpoUIlmYRm
zU7TNGQ4osExHE057S2eP4suVFwcJWhjcevgHSYxUz1Z/5RkSDvuz9rftAmNUWU+
SXaKQD8TpY7h/qBvybRxxwJ8BdAJbIO7S5zuZbH6AsECnSFTME6nRow39VNon5Lh
jMRbpNQn55e8yxSmgntq3IDj/1KO7p14cCKCeUW2CW4C0LAZiDp0+KWIXIjNgBZ0
mcXdvMXWgGkWten6bdgkFH3v5P0b+Ow0fEGTreFuvpxjaLMRDZ9uImjekZig1/R8
+LN8cTRARqfdw25RY23Xb4yF
-----END CERTIFICATE-----


================================================
FILE: pingora-proxy/tests/utils/conf/keys/intermediate.csr
================================================
-----BEGIN CERTIFICATE REQUEST-----
MIICkzCCAXsCAQAwTjELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAkNBMRgwFgYDVQQK
DA9JbnRlcm1lZGlhdGUgQ0ExGDAWBgNVBAMMD2ludC5waW5nb3JhLm9yZzCCASIw
DQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAPLP2xGvLbhFS3iD7GyRk6e5Kc1F
Eb/0be39kL5POBGBl7SatwlIXzvc2RSW3Gla7ayvRiWo1WXDzuNv2M3Zu/uFmHkB
a550Q2hWzD8StuWuDLVS79LVFcYOCE7r35fXZDAS1H63flgX5kxjuIGyR6gnZMrq
LESoyYUpP+G6b3chv98n+ecCsVWNeFkxqxZibz+oVHfx1OoOUZSvvlQAt/5jhfKt
DbCvUl5uz8VyxY4QWDUY92wx5JO+fbaAH0UCrb5MPCutqeKrtVInomRKzs+3pSAK
LyDbhNGJgeCI99a1cFRt02Uh3tssQbVSoBJ0P8ktcdlshS3HuM5zOcrc738CAwEA
AaAAMA0GCSqGSIb3DQEBCwUAA4IBAQBwlDK5c3FCzI2+d81n/ChiV83gRwZdBXZA
xAdWDPvAaNkjig5xxX6h3D1lakdMBIyCZCU3Ln+I1wJ38B+uuKt4wpoYmefR7Zw2
MtKD6IyiZdbMRN9/eV94pO8swh9SuUlTQhHU1Em0VRGVzBJrMTjh+wtKV4nm/6+6
PBUzZARrBI9qOBO+WOvN6XnvjXmb05D4lEdZ1NvLIKm/r6Nkq+bKwLoAMvc/u/lS
EcPLzAAa2fnzcdMhc1x1OLAp/+Dl3IDuQJSstqqUgWM59+3Nc1OBUaLYgIoq0W+W
T96ilOxlS02SSLwaeWXqwEnYTJe/8JVYX2HXW42TdUwbAEdyLIa/
-----END CERTIFICATE REQUEST-----


================================================
FILE: pingora-proxy/tests/utils/conf/keys/intermediate.key
================================================
-----BEGIN PRIVATE KEY-----
MIIEuwIBADANBgkqhkiG9w0BAQEFAASCBKUwggShAgEAAoIBAQDyz9sRry24RUt4
g+xskZOnuSnNRRG/9G3t/ZC+TzgRgZe0mrcJSF873NkUltxpWu2sr0YlqNVlw87j
b9jN2bv7hZh5AWuedENoVsw/Erblrgy1Uu/S1RXGDghO69+X12QwEtR+t35YF+ZM
Y7iBskeoJ2TK6ixEqMmFKT/hum93Ib/fJ/nnArFVjXhZMasWYm8/qFR38dTqDlGU
r75UALf+Y4XyrQ2wr1Jebs/FcsWOEFg1GPdsMeSTvn22gB9FAq2+TDwrraniq7VS
J6JkSs7Pt6UgCi8g24TRiYHgiPfWtXBUbdNlId7bLEG1UqASdD/JLXHZbIUtx7jO
cznK3O9/AgMBAAECgf8B6w+uUkzJxuplRHNE1Oi5Khiy0p+dJK2DCITD7scuXmVg
NVea20j3AWb7wUKxaWt82CdrFdnEPb39zv2u2T9XEFmRFgmuDB1vfUMn4N2LInFq
YYa9Nwwb7mkFOWKEdEFPshEIuKCvABoA+w7TTgzf4jNHM5dcPakPbRd4LVNCiLbg
oDjExmPeQ3t694vieLpm0M/h1HlQ8WbvaaCT14lUbTKUH/fo/4wgPMfIO8W205Pm
gj6rNSYhiK6f4OZp9i3ANZwZxarB2H3pXELda8EZQLoFD7s7mslQ+Lk8z3oy11Ov
5olskRj8HROWwikHXikG+nnToEZWsDp6LpC6+xkCgYEA/l8iw9ORQvtnJIEpH2mS
Oclk/5kYpkxRk5XXfXDxGvf9sdYTArvQoHBSS9HCJSE7aNnjRFXMotUYxZAtc36Z
LkrAn0hfyiip2f0sxKCnlRB5I0DF120u4uLi2mhGnOBX20uQv1Vj0+Y9TncUWTQx
EVKCCs9ytZnlOvdcUE7H4scCgYEA9F3GotyzVT9T8MPni6j/iPFyeEg7CRb19dgp
LwlAjmv5MA23Ok5TKCjOYqISSyVokEaro/pbfpp1amdzJ9ytBTOIeFJ0QVpskrmx
XoKL2YZW2Qk0IAotYhBbLmi/5YPe6zfek4O9rQubd7lR1FauIQxa1it7ljaVREbc
lScj1YkCgYAFuZhrteBIFKZuoOWPCm47FLhMNGLko0UWwEGYVilnBPvVu86zugxo
//4qLK9k7ImMw5Kk4BV5+LfVAnizZ78E1rPdIeDeCOpBuLwANOlwpm1DiNqrDY8H
ljmq1rv4Heh8TAgW9lIH29+3W2C+3TjZffTlT2PyiGMrX5PZTtya0wKBgQC7UzPV
Xzg+LjirxZG3VwrksKpemIhg9HACUP1pKD+Lrius8aa3FJncnENyCunZH0kj6Hjl
UCNZTxCZS8pUEW+1IAcKrbKe8rFuXNkiKRMJ4lirMcn6kbKujPlI/1WznL6DNCX0
kTYS9GXuhmq7SuNbRDxSF606vob4exXXZNSseQKBgGOjaYiivK2kgiq5DOJOsGF/
jTgiHzvWDWqWmTUo2ATmIfhb45j18KTnosZKFvcSTIOfeNOHgBHNtCEShbmK3+zW
eVKmBeDZxePesonL2IHc1BHkF3PpQCm/p0nUBcuAtWXsW8Moj8sNh9xxAV5l0Xuj
9roQeE6VmidUjodZ4tTi
-----END PRIVATE KEY-----


================================================
FILE: pingora-proxy/tests/utils/conf/keys/intermediate.srl
================================================
199D7F7B72FA2892E58A80EC205EE63A20543BE5


================================================
FILE: pingora-proxy/tests/utils/conf/keys/key.pem
================================================
-----BEGIN EC PRIVATE KEY-----
MHcCAQEEIN5lAOvtlKwtc/LR8/U77dohJmZS30OuezU9gL6vmm6DoAoGCCqGSM49
AwEHoUQDQgAE2f/1Fm1HjySdokPq2T0F1xxol9nSEYQ+foFINeaWYk+FxMGpriJT
Bb8AGka87cWklw1ZqytfaT6pkureDbTkwg==
-----END EC PRIVATE KEY-----


================================================
FILE: pingora-proxy/tests/utils/conf/keys/leaf.cnf
================================================
[ v3 ]
subjectKeyIdentifier = hash
basicConstraints = CA:false
keyUsage = digitalSignature, nonRepudiation, keyEncipherment, dataEncipherment

================================================
FILE: pingora-proxy/tests/utils/conf/keys/leaf.crt
================================================
-----BEGIN CERTIFICATE-----
MIIDiTCCAnGgAwIBAgIUI/kgcBZtqXK/VnK2VZ47mS6DETwwDQYJKoZIhvcNAQEL
BQAwTjELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAkNBMRgwFgYDVQQKDA9JbnRlcm1l
ZGlhdGUgQ0ExGDAWBgNVBAMMD2ludC5waW5nb3JhLm9yZzAeFw0yNTA4MTIyMTQy
MjJaFw0zNTA4MTAyMTQyMjJaMFMxCzAJBgNVBAYTAlVTMQswCQYDVQQIDAJDQTEh
MB8GA1UECgwYSW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMRQwEgYDVQQDDAtwaW5n
b3JhLm9yZzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAJ/2h9zWB+JG
pvOxJsQeUXr/IJtdtHagvNkRL8hvOFNIfazwXZ2bcbCO0GVVx8CfCFIH8nPpKQsU
xnXQDkrZWHCj2jxATVOUT0HzDbxkq7Um+IicReOVy0pL5thKjdRc4pwJey4g1YsH
COXtjgcSjo7at9hHexVO1QFOkMT2c4kkzl6OIiLT2Lq3HfZ9ftdC1/4lNnlu97zy
CeDR0woyoj/4ELXnsWZQxFWpc5Jh5SM/8vkOkeWWGIuY0SH4t/Km/5S8GTfxNdIA
V2miOQhRC1ocQA3hXlOw41PTYk8mLh33YTTc/xWJ7d6BIosDpvK1HnTIyiPzB2io
CTIq2xsn0+ECAwEAAaNaMFgwHQYDVR0OBBYEFEDWXZUM1tkxzrRgt8rsCU6LmaH2
MAkGA1UdEwQCMAAwCwYDVR0PBAQDAgTwMB8GA1UdIwQYMBaAFCMHGxO+uP5TCjLD
Hk4ZFAMrY8l5MA0GCSqGSIb3DQEBCwUAA4IBAQBzlbtg8H+kVhWTXKcSZ5qfMI9D
U6uLSL2OKmpuIe0N3qwtiGO3PLtG7cyqX3I//PCg86/YrHnFf2yDRQ4IGuxoDJUf
APSiKFO+44zRtgHBZffckBTil1LfyvePUgVxEDObV2XUyDiNmABmZzBOVXfrAh8X
FbuGutOq8PiWX7rauoX5tw9tM2RRTdbxtlHfIPmcfj0nhIpIZmkLUZFaRkYaiaoE
3HUpzcOqnBw3A5rbAP+buY7kxVslBDKCwKZeDMQooLEAOCvUKz4iHSRj17Ryj1aG
UoRKI3XGQz6C8CbKMlI93EDcWQNSrvqve3TR/AIj9g/fiwVXvKQJs4CkjWEi
-----END CERTIFICATE-----


================================================
FILE: pingora-proxy/tests/utils/conf/keys/leaf.csr
================================================
-----BEGIN CERTIFICATE REQUEST-----
MIICmDCCAYACAQAwUzELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAkNBMSEwHwYDVQQK
DBhJbnRlcm5ldCBXaWRnaXRzIFB0eSBMdGQxFDASBgNVBAMMC3BpbmdvcmEub3Jn
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAn/aH3NYH4kam87EmxB5R
ev8gm120dqC82REvyG84U0h9rPBdnZtxsI7QZVXHwJ8IUgfyc+kpCxTGddAOStlY
cKPaPEBNU5RPQfMNvGSrtSb4iJxF45XLSkvm2EqN1FzinAl7LiDViwcI5e2OBxKO
jtq32Ed7FU7VAU6QxPZziSTOXo4iItPYurcd9n1+10LX/iU2eW73vPIJ4NHTCjKi
P/gQteexZlDEValzkmHlIz/y+Q6R5ZYYi5jRIfi38qb/lLwZN/E10gBXaaI5CFEL
WhxADeFeU7DjU9NiTyYuHfdhNNz/FYnt3oEiiwOm8rUedMjKI/MHaKgJMirbGyfT
4QIDAQABoAAwDQYJKoZIhvcNAQELBQADggEBADtr4Eb3FnNjfHI/3uKbdCOgskqS
fRHerB4ctw7X+Cns26HwcOJCea5xnWP4JDBCvFmsyTAcwvdSRqyac1QNeUAkNBEV
nDPf2PqkYZCLxurRM/gufBiLrWuZYr1ISU2WKV5O5SW6XA6FD8lbzYHxylHAD2+5
K7rLw98rOqd0lqkgbgj89fAonws/ptV5FX4PgwDbfmn9kft2hytmk+BR/kOj77Qc
SKBGNCtCulYACtRbuBaQueRCyMF/9G9s1+fn768Ecub5cQobvVSToPvEdGB15E4f
yttahXtu3wxIQ9zAtd4tHhUDa2aozLxOnEOt/YOoLAHq7t6Rz4Azs+LnfeI=
-----END CERTIFICATE REQUEST-----


================================================
FILE: pingora-proxy/tests/utils/conf/keys/leaf.key
================================================
-----BEGIN PRIVATE KEY-----
MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQCf9ofc1gfiRqbz
sSbEHlF6/yCbXbR2oLzZES/IbzhTSH2s8F2dm3GwjtBlVcfAnwhSB/Jz6SkLFMZ1
0A5K2Vhwo9o8QE1TlE9B8w28ZKu1JviInEXjlctKS+bYSo3UXOKcCXsuINWLBwjl
7Y4HEo6O2rfYR3sVTtUBTpDE9nOJJM5ejiIi09i6tx32fX7XQtf+JTZ5bve88gng
0dMKMqI/+BC157FmUMRVqXOSYeUjP/L5DpHllhiLmNEh+Lfypv+UvBk38TXSAFdp
ojkIUQtaHEAN4V5TsONT02JPJi4d92E03P8Vie3egSKLA6bytR50yMoj8wdoqAky
KtsbJ9PhAgMBAAECggEAA1e6ABXi5UoXrAj8J+YASuMw8b40CrSSLbELwBL+6NKf
ebEuK6B3cDqTxUJVIcPQ/zHWUbDCIE6nVQfrfIntLLFn2pF3bDMxss2a8GBkLC1r
zSMC3N4g+OT8JnHsY88rFxqlndGm1LhpabCcoq4zF24foF/iBRB4KAZVxR/nSyrW
xUvDwTRoz+tdUSyvIOIPpmpqMV+G8UFTgH54AKc3PW5uwJn3gqcDYC/kKCAaIefD
ARfjGRAzi5VO8MJiEMD/3vu2nx3DQyZqINIANQqwEtCBM2RE+hCYbUm4/ln+VLed
DmZwU2eX/19hluPcWDAblhTIZ4aPj5mQNj2TQXFDfwKBgQDPui3/qLC1ivuLAJas
D8zmZLWOa3vYOROyAOBl7vor9ocXBoSAxqCbXjByzgNxe+5SHw+cyRQ0qAc9dMFd
YvhHcpPVst+3dnZAccE9CLRDSQSRoWD8IGL70BTuN8+ju+qxfdYo8HTG15rARy9e
ereibDaa3Ef9nUQAg5g5dPzH+wKBgQDFItOlzQKD3Gwtoy9jiyP4DYHcnw8yb4lV
8hSpvf7Qc+9+Bly4T/VRliA9nAmJnc4j6o87OB30wLvRiaOeZB2jex6189iAteME
I1DbRyHyf9Vpd1lXA/9lPxzky7o7vWABfntAWZ/3RdMLMtP1doDwLMQw55fEsaoq
7TvlmD0A0wKBgQCqs/TZA2czyOKtd+5ZtyJKsrgAMZO0PDNTNCUznw820YByC4kX
yiJxixWFQobR22YdVikeTp+sJejNOAUvGQWusRmLo1L1EQRcMR77aQu5v2dhxZxN
lM/C31xT5slbZDGZai9ztSZBwSwKlnT2zyHY99Rnrl36rCIVyg5uKRURwQKBgQCX
5gfzH46qj/ODDtR6/UGP5siDeMQ69pp58Phe+pkXgd2t27UiB+pdHTJmho8KzN+D
6T7IQKtEZiXShR3f9ACqcTnutZ/DPWNZUuUAsUTFGB8XDvF2DQyDtSfMW/Z6Baeu
Pwk1QlnyLIk7fcS4xMEBT1002Z6l3sfiH74hYTbQJQKBgQCCUmG2y8wjDP301VCx
8/bSHZCTYdDnjbr9lyXnZ9LsSmW7BX7KwRltdsaxu6IsZkL0ACm2en2jwW4BIAoq
uTsy5D16zqdInh0yKfUN6XZEK5BFWkD5E54m6NtP+YCaN0hLQlAcG7Cl4Epp/otL
MDSZFlFnjA1dtFcj4nX2FD1jJA==
-----END PRIVATE KEY-----


================================================
FILE: pingora-proxy/tests/utils/conf/keys/leaf.srl
================================================
23F92070166DA972BF5672B6559E3B992E83113C


================================================
FILE: pingora-proxy/tests/utils/conf/keys/leaf2.crt
================================================
-----BEGIN CERTIFICATE-----
MIIDiTCCAnGgAwIBAgIUaU54M+5hDvm7AXbOOmnbYT/A40cwDQYJKoZIhvcNAQEL
BQAwTjELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAkNBMRgwFgYDVQQKDA9JbnRlcm1l
ZGlhdGUgQ0ExGDAWBgNVBAMMD2ludC5waW5nb3JhLm9yZzAeFw0yNTA4MTIyMTQz
MzBaFw0zNTA4MTAyMTQzMzBaMFMxCzAJBgNVBAYTAlVTMQswCQYDVQQIDAJDQTEh
MB8GA1UECgwYSW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMRQwEgYDVQQDDAtwaW5n
b3JhLm9yZzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAPRZmAykZFbE
hIvUC2SDFCN4eV0TzHlk6735U6Goo/AL8JadeiWEEXg3EG2PnMA1u7G7DU0PG8aw
h54X4iIUDosv/pDQ9MhcHyQawoxlf+bHwTD+5YQHzLJ+hbfp7nzsak5ZE4rq1ZOt
Hd856MKEZv1O2WvUZwfZiC/5Gmv9b8pmgJHbjviwFtDKgJOCjgnkkV2kJ6H7KEww
I2b/+aLtZGABPu57P1/rVtXvR4YSYmzF/VuLCmlpun3cQOK/5T8o4oi342dXn0EF
Tq2/xjLRe1/g+syUi249sq+h5TSeXHTkJs3Yo7/KxsitSFTOz+MuFLnDBgXRebvU
EBicU03yPxUCAwEAAaNaMFgwHQYDVR0OBBYEFJVgFA+/ZHqcz7vEDg5CaivXc7JF
MAkGA1UdEwQCMAAwCwYDVR0PBAQDAgTwMB8GA1UdIwQYMBaAFCMHGxO+uP5TCjLD
Hk4ZFAMrY8l5MA0GCSqGSIb3DQEBCwUAA4IBAQCW6uv2AETVq9tEWo3L0Gg+b3il
DVOf2JuZ4UlVfhXoCOdflU2M2CEyBcmTqcZlEIFJhYNQzOOvOYG0IlVzrXdP6yHx
/zj16d1+b5eLJhBPYeCMqBoFoTtnB8gE3BVp+9fkB+jD0/FkOPVE8RUZY/8mcFvd
ff58oB0bGDPLZkziH0kMsymlEpYbs2cTVDcvcoviblVRa4Wt35yTWyLTolxT4NTU
iI4yUKpx3xkGpOZTg26EXnrtLjAhGxdXpcJ9PLd6ClG81wiHZfhyshupoM4ad8lQ
qvanYlaEkdzjzjr9Tw35HYEbQk2POarsjmd4yW9E5ER+xj6ia/lcyc4h/VFB
-----END CERTIFICATE-----


================================================
FILE: pingora-proxy/tests/utils/conf/keys/leaf2.csr
================================================
-----BEGIN CERTIFICATE REQUEST-----
MIICmDCCAYACAQAwUzELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAkNBMSEwHwYDVQQK
DBhJbnRlcm5ldCBXaWRnaXRzIFB0eSBMdGQxFDASBgNVBAMMC3BpbmdvcmEub3Jn
MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA9FmYDKRkVsSEi9QLZIMU
I3h5XRPMeWTrvflToaij8Avwlp16JYQReDcQbY+cwDW7sbsNTQ8bxrCHnhfiIhQO
iy/+kND0yFwfJBrCjGV/5sfBMP7lhAfMsn6Ft+nufOxqTlkTiurVk60d3znowoRm
/U7Za9RnB9mIL/kaa/1vymaAkduO+LAW0MqAk4KOCeSRXaQnofsoTDAjZv/5ou1k
YAE+7ns/X+tW1e9HhhJibMX9W4sKaWm6fdxA4r/lPyjiiLfjZ1efQQVOrb/GMtF7
X+D6zJSLbj2yr6HlNJ5cdOQmzdijv8rGyK1IVM7P4y4UucMGBdF5u9QQGJxTTfI/
FQIDAQABoAAwDQYJKoZIhvcNAQELBQADggEBACYNZWfEc+aOk6XOnITfeiCSMtub
XFROVevgZ0GBJa11Ehvgra5bMZQqQVaWJHUTE+3xbAmqbXwEbV589MzvLO+67SFa
SQOebAccMf5Tp0OBS4jpq/1w7PH1RjN2IFdH4HoXUyAA0STEbyXukxjKSQooSMl9
NpunpJY4fueKit0Fly/AHmBBlTosk3fwR6PU5wLRAv3UlG0x2XQBS9DksZmHWtRN
nGPp1zDMnG56R+Np5hR64R0b5zVRsXUK5qo/6aocdJp7wwWqba456AMEcoMfW6DQ
d6fQ/+o6rug9Lyog+yUzpKlJWbsJ34+j8neJuhvlBkgA3PskA2drglapH44=
-----END CERTIFICATE REQUEST-----


================================================
FILE: pingora-proxy/tests/utils/conf/keys/leaf2.key
================================================
-----BEGIN PRIVATE KEY-----
MIIEuwIBADANBgkqhkiG9w0BAQEFAASCBKUwggShAgEAAoIBAQD0WZgMpGRWxISL
1AtkgxQjeHldE8x5ZOu9+VOhqKPwC/CWnXolhBF4NxBtj5zANbuxuw1NDxvGsIee
F+IiFA6LL/6Q0PTIXB8kGsKMZX/mx8Ew/uWEB8yyfoW36e587GpOWROK6tWTrR3f
OejChGb9Ttlr1GcH2Ygv+Rpr/W/KZoCR2474sBbQyoCTgo4J5JFdpCeh+yhMMCNm
//mi7WRgAT7uez9f61bV70eGEmJsxf1biwppabp93EDiv+U/KOKIt+NnV59BBU6t
v8Yy0Xtf4PrMlItuPbKvoeU0nlx05CbN2KO/ysbIrUhUzs/jLhS5wwYF0Xm71BAY
nFNN8j8VAgMBAAECgf9eLNRtYEP2gnHox9DxlujWwu1Y8kiHK7OwLxK3O51I51Eo
EN8C6+PPxr6OJiDunnG4uQm8qWtgff5xmsLiX4M7d0P7Nzh2AGCq3vrHIazUmtMw
Dw271UW6MF6ug3q8qwxN0LG3g3V4H+tjcu5CtMT83BGa0uzixEm43klQqwfATwBy
43w+JHZYxvWo6sxF/Pf24XJzalB9UKOBFbOp1wFRmHwrGwVVwwq/u2yzE848Opat
KMEeAT3SZQh0Km6zmZ+/32Xir0p4QOxhkaBxE6uFQVcvcahZ0ovTHQPCqMjfvr3s
LRRNqDVS24KCJY67+Feb1heKu4GKI9czoAH0QAECgYEA+9dp+NDOpvNxrv/VoL/S
zSsVosILGLBKbnmnLsKRN+ahhX79uGy49AHIMcKsCOK5assfrl1aoJHL3IHZmcEB
wEp8XkK3bOOCJJb0jlj9R0Ejh0Ext0A2zXGhs52JR817C/NIDaGeHPh4v25HZKep
ocZ7JWe96oDDluwHaCDibgECgYEA+GKDDgf7SIqQPWp7ToZM0G3CNU/cxzUTNkLS
NC2oRt+ZUIjE0eo/Puihcgx5y8FPn8hs1iQI+zyTXPzspOsKnQL/djP6jMZ6XFWE
R2Qcj/m+rTZSzcsNN3SBebOald1jLr48sIyyQTb0Gh0KtQ4KTf0yKuEgaHiF+ScM
l4LhORUCgYApoNzqfRF7tUf4Zl+Yl7yvn0yPP8X3ycQz6LYC27SHaf8PAwPLhWU5
KEZAO26WdWuyxGqzNskxO4hYJbqjWK0CbQ2LwzlwrVao168LDJipO5I03EjsgpfM
c9kHyKWVkdiiDA+/+RQas9O5yO/SKoi2rglTEIfrCGfMPa2nv6/OAQKBgQDO6ino
z2dat+uO7hyIfsKQw06M4Nm3rZQymJnJ09siJ3TtrPHhOPW071BG1PPFdGVjYzCf
d2dv+7d7OEve2tp9kBjGHGj1SwZ10tueKVzN56wbWWzDeQqqjsipXKBDhijwsJOY
M6zvPNs+wcDAsVCORYW8SMyZmwVoWEpaETKUPQKBgAV5otiYpAzuQNJGw93a3xzc
PKMtGaFbPmeJ+F6WCneU/Qq1bYmWHe/ycQGB2S8MucWiBkY552ZGjJ/hrSBTVgc1
cBlosE6qkkEbGG2ektvU+W4AjedvT3S4wPdfmOCkbZe+OVqS+4akgPjCfXpM6/eF
HhtAJlWFZFHDbq7M3Y0E
-----END PRIVATE KEY-----


================================================
FILE: pingora-proxy/tests/utils/conf/keys/leaf2.srl
================================================
694E7833EE610EF9BB0176CE3A69DB613FC0E347


================================================
FILE: pingora-proxy/tests/utils/conf/keys/public.pem
================================================
-----BEGIN PUBLIC KEY-----
MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAE2f/1Fm1HjySdokPq2T0F1xxol9nS
EYQ+foFINeaWYk+FxMGpriJTBb8AGka87cWklw1ZqytfaT6pkureDbTkwg==
-----END PUBLIC KEY-----


================================================
FILE: pingora-proxy/tests/utils/conf/keys/root.crt
================================================
-----BEGIN CERTIFICATE-----
MIIFnzCCA4egAwIBAgIUE5kg5Z26V4swShJoSwfNVsJkHbYwDQYJKoZIhvcNAQEL
BQAwXzELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAkNBMRYwFAYDVQQHDA1TYW4gRnJh
bmNpc2NvMRAwDgYDVQQKDAdSb290IENBMRkwFwYDVQQDDBByb290LnBpbmdvcmEu
b3JnMB4XDTIyMTExMDE5MjY1MFoXDTQyMTExMDE5MjY1MFowXzELMAkGA1UEBhMC
VVMxCzAJBgNVBAgMAkNBMRYwFAYDVQQHDA1TYW4gRnJhbmNpc2NvMRAwDgYDVQQK
DAdSb290IENBMRkwFwYDVQQDDBByb290LnBpbmdvcmEub3JnMIICIjANBgkqhkiG
9w0BAQEFAAOCAg8AMIICCgKCAgEA4s1XxwZruaRwuDX1IkM2oxdSdjg7FeUp8lsN
Uix4NdXz8IoQWRzCfFuRBKFHptahutSO6Bbewm9XmU2hHG7aoCqaZqEVQ/3KRLZ4
mzaNBCzDNgPTmDkz/DZKzOVuyVvbmTOsLn53yxKnFP9MEDIEemqGiM80MmFfCm/o
0vLkjwkRpreMsWPUhrq3igTWRctUYMJAeDsEaaXB1k5ovWICrEylMzslgSNfoBed
NmBpurz+yQddKNMTb/SLYxa7B1uZKDRSIXwwOZPdBDyUdlStUPodNG/OzprN+bRC
oFRB9EFG1m5oPJXQIalePj0dwhXl/bkV4uRxCSZmBZK3fbtLMF+Wkg2voTrn51Yv
lKkzUQoEX6WWtUameZZbUB8TbW2lmANuvGBmvBbj3+4ztmtJPXfJBkckCeUC6bwC
4CKrgB587ElY357Vqv/HmRRC9kxdzpOS9s5CtcqJ3Dg1TmLajyRQkf8wMqk0fhh7
V+VrPXB030MGABXh5+B2HOsF307vF030v7z+Xp5VRLGBqmDwK0Reo2h8cg9PkMDS
5Qc2zOJVslkJ+QYdkea1ajVpCsFbaC1JPmRWihTllboUqsk9oSS3jcIZ8vW3QKMg
ZbKtVbtVHr3mNGWuVs96iDN5Us3SJ6KGS8sanrAYAAB/NKd1Wl3I0aVtcb6eOONd
edf9+b0CAwEAAaNTMFEwHQYDVR0OBBYEFJ5hR0odQYOtYsY3P18WIC2byI1oMB8G
A1UdIwQYMBaAFJ5hR0odQYOtYsY3P18WIC2byI1oMA8GA1UdEwEB/wQFMAMBAf8w
DQYJKoZIhvcNAQELBQADggIBAIrpAsrPre3R4RY0JmnvomgH+tCSMHb6dW52YrEl
JkEG4cVc5MKs5QfPp8l2d1DngqiOUnOf0MWwWNDidHQZKrWs59j67L8qKN91VQKe
cSNEX3iMFvE59Hr0Ner6Kr09wZLHVVNGcy0FdhWpJdDUGDoQjfL7n7usJyCUqWSq
/pa1I9Is3ZfeQ5f7Ztrdz35vVPj+0BlHXbZM5AZi8Dwf3vXFBlPty3fITpE65cty
cYnbpGto+wDoZj9fkKImjK21QsJdmHwaWRgmXX3WbdFBAbScTjDOc5Mls2VY8rSh
+xLI1KMB0FHSJqrGoFN3uE+G1vJX/hgn98KZKob23yJr2TWr9LHI56sMfN5xdd5A
iOHxYODSrIAi1k+bSlDz6WfEtufoqwBwHiog4nFOXrlHpGO6eUB1QjaQJZwKn2zE
3BjqJOoqbuBMg5XZRjihHcVVuZdU39/zQDwqliNpx3km4FzOiEoBABGzLP+Qt0Ch
cJFS1Yc8ffv616yP4A9qkyogk9YBBvNbDLB7WV8h8p1s4JP3f5aDUlxtAD+E+3aJ
8mrb3P7/0A2QyxlgX4qQOdj++b7GzXDxxLgOimJ4pLo0fdY8KWMeHvZPiMryHkMx
3GSZCHeleSVBCPB2pPCzUqkkKADbjBX3SYJsAMF9uXQAR4U7wojjvAmbt6vJEh6j
TEUG
-----END CERTIFICATE-----


================================================
FILE: pingora-proxy/tests/utils/conf/keys/root.key
================================================
-----BEGIN PRIVATE KEY-----
MIIJQgIBADANBgkqhkiG9w0BAQEFAASCCSwwggkoAgEAAoICAQDizVfHBmu5pHC4
NfUiQzajF1J2ODsV5SnyWw1SLHg11fPwihBZHMJ8W5EEoUem1qG61I7oFt7Cb1eZ
TaEcbtqgKppmoRVD/cpEtnibNo0ELMM2A9OYOTP8NkrM5W7JW9uZM6wufnfLEqcU
/0wQMgR6aoaIzzQyYV8Kb+jS8uSPCRGmt4yxY9SGureKBNZFy1RgwkB4OwRppcHW
Tmi9YgKsTKUzOyWBI1+gF502YGm6vP7JB10o0xNv9ItjFrsHW5koNFIhfDA5k90E
PJR2VK1Q+h00b87Oms35tEKgVEH0QUbWbmg8ldAhqV4+PR3CFeX9uRXi5HEJJmYF
krd9u0swX5aSDa+hOufnVi+UqTNRCgRfpZa1RqZ5lltQHxNtbaWYA268YGa8FuPf
7jO2a0k9d8kGRyQJ5QLpvALgIquAHnzsSVjfntWq/8eZFEL2TF3Ok5L2zkK1yonc
ODVOYtqPJFCR/zAyqTR+GHtX5Ws9cHTfQwYAFeHn4HYc6wXfTu8XTfS/vP5enlVE
sYGqYPArRF6jaHxyD0+QwNLlBzbM4lWyWQn5Bh2R5rVqNWkKwVtoLUk+ZFaKFOWV
uhSqyT2hJLeNwhny9bdAoyBlsq1Vu1UeveY0Za5Wz3qIM3lSzdInooZLyxqesBgA
AH80p3VaXcjRpW1xvp4441151/35vQIDAQABAoICABm7ytXeOKLbsZ51INc+YRio
MMcRIkMduWCyTBSizxDssbz9LVWvGbIagZ3Q3txjRf54164lyiitkXbng/xB57R8
oQA8DrmkNisNuSmDSwTKP2wFiyCefPOFBX+yGJvoPEZpwoOT/eugtix/uxWrVy68
n38uY3HD8pCwme41eRFxqfsMoH4QIbEXxnN2kQliRLSl1cLOj3WdRR0X0HKMiFkc
aTIi5+J7LQJxK3lb/yMdBpuwpjVXncD6MkaP8bCoB/yz0w3RlXcy+8TbSs0SVof1
mRK2DPUMQ4qtlVGzvbgFIBB8fn9BUFhBa1wMey/mZC4hrgYMfXbYUIMZXpB5i9I+
kLz4IuTYlKL46IWa+f1WritsC2F/Oog7zuejo2MNGmma+ITReCx2hxB1+H+yl3As
HmXDjp4wDrnTIR38MgIfZmrtSqqvm5zUYsjEBFSleasH/K7uDddwqgYQ6TwUaqVY
eiDsyWELZQY+0JozP9zeE9J2X0HbOvid+fwwns1TPXyTjnPsLdSOCFuBZoWcYfiu
XnFXCEjT3HDjx9ZmzAujm7is86QSkKDZHJB34DTd0eVs8EZyxNqsB748vfigc7ag
1F/quaKYihBY7BKG8dDyJ6m7hyG2j4jHy5zZgG4mEs84n4ETvUSWK1g+vpVgb3vB
MXcK6N8M/vAl+GT3LJOBAoIBAQD44nPNIYK3X1ZWj5zea4i/LucNHRwU7ViuXZJW
c4WxeT2uo/24zVcUZlvgaor5QTlsw2Ab38gc6OxBwRv0v7GRlxufi+xpG/wJDJs3
ZSAMa4P5l/C06sOIpOq9p0X0Y+amVliAFcQtYQBTBK/APD3HIhm03hW9U1pT2jKV
JnkKaA/eMZPj55wtKEHDuvUcYll7bF5xmp9+/ECSnobxFSE0sFbXWss8CkEVJBdr
OFOlWNUJcGtBJwQi3P/OeOqotfo0BCxZ4Rt51/GFLqWjZC81lfvcVbcC4Ba8LXkI
AlLYI1uPI0ohxIMFd27i6Q92Ih042LzTWfl1MwotBSBM8CNVAoIBAQDpSUW+kCao
HOTPTn7mv8jR8Vp/uosyIqG4guynm65udI55n+y3881v/BrPGG6tsFaLsOTCUdR8
mxiK0X7d6alSE94H8DREhMnRJjoVJsyvjF6mYleqdjDUFxzkwImu0TWsZz3NhIqv
8kgSEa58JPEinufoKHVYh0J3LLXHYQ3J3sFx3IcO32Afe7pLwuLjEh7j1GWM7auW
V0fpDMUjri/j7NF/4hiBnd7fs/i2nMp03+XxYxrqnInolhJkXxyVbsIwFLb0flbK
EWeGudwMYc3W1f/uV2+OjdNPDY2ve7GntPMRFu7SSvFFjTRdqUhXlBfNUDGWugeT
tng3onk7IUzJAoIBAQDd6PubkR995LGUqKQT5QmefXFhzey19BI4FhJeps4zuYh3
6JxXZC8ab1HIPPcA21kaUvGkqNlCfaP51PbaOPlYeMUWcqot5dfJMcZLlA0JRev8
Za8ngJMriPAMfdLv3wtOkHqEaePrGiwx2WHjI1Np9Eu7arEzh9hoH4suVYli7/oG
AWp9sIsd8GEC5fWag06Jr8xduqIvlTb2BAcJee+LjRdBGSFQvUveT7nZzfU23ofE
zMm049baRvaG4GVKXEdkjbwFv6LB9vrP5xGlJ7S4MKzKflqZY7ihvGHH9FptgMko
TSzSAudXvm/OPkOc7zni780dHYJBL2sJTSLJtuupAoIBAHhoS0k6Wdl3YFnnp/Qt
lNdXfWBjxiiQW2xClydDYVq9ajQ4aRPhEG32b1fowmd/lovvN4NcfRH7c0VjL9oW
GkC05GqwfinHZ+s9kckNB6SsDMZQB/OBoV42t8ER536FmPBtMSb8fCCoKq641ZhZ
8OPvpL7c8wRIe/PK7eAEpftFsA62xjbU8GYPlG46HqUY2zy4idmdamzki8crwizS
YQGBX/hjmEZ+V2SbHYoTjyOX1LUsc94YAc48dy27MaOnUS9D4dJ7ywvsw8Rz9bGm
YXm7Zqd8FaY8aY5p7nFepKls6fAuKAH+kF1XrmmRUDdzxn1AIPgs+HAzRAVjJLNy
UpECggEAJLxoXdw6VbOCri1Q8wlA78ngcUEE09yL7qPVGtckCD1OdpJjkcskWoOO
CkMsVtFjJOmQL0Xj/MR4/Zyk7qB3bm3oUWev4sFzdpWswN8fzOA7K4hTVC5BeSoS
0uCiJ9/Up0Yte5Q0sHtO8U5xtnrSPYx5mHjPoh1ZbLem3OeGy1ifm/K8/0697bjX
1UI5OSG/bZUU+bO7oBoZPIXoyMUYvnPBPqfdVI6E+mz1zFILOh9Vl7017Gi9UT9z
hDb8K7IfTDTSgvqS+H7U0X9T8cfoSSWNxRo2DyaJ0aNt36qZzkJNhunvaif5W8f/
74xuCrejGJzwfA5Uel7mb6rqB/1law==
-----END PRIVATE KEY-----


================================================
FILE: pingora-proxy/tests/utils/conf/keys/root.srl
================================================
1C807FB6A8D925A28881E5B0BD746DD370B4C883


================================================
FILE: pingora-proxy/tests/utils/conf/keys/server.crt
================================================
-----BEGIN CERTIFICATE-----
MIIB9zCCAZ2gAwIBAgIUMI7aLvTxyRFCHhw57hGt4U6yupcwCgYIKoZIzj0EAwIw
ZDELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAkNBMRYwFAYDVQQHDA1TYW4gRnJhbmNp
c2NvMRgwFgYDVQQKDA9DbG91ZGZsYXJlLCBJbmMxFjAUBgNVBAMMDW9wZW5ydXN0
eS5vcmcwHhcNMjIwNDExMjExMzEzWhcNMzIwNDA4MjExMzEzWjBkMQswCQYDVQQG
EwJVUzELMAkGA1UECAwCQ0ExFjAUBgNVBAcMDVNhbiBGcmFuY2lzY28xGDAWBgNV
BAoMD0Nsb3VkZmxhcmUsIEluYzEWMBQGA1UEAwwNb3BlbnJ1c3R5Lm9yZzBZMBMG
ByqGSM49AgEGCCqGSM49AwEHA0IABNn/9RZtR48knaJD6tk9BdccaJfZ0hGEPn6B
SDXmlmJPhcTBqa4iUwW/ABpGvO3FpJcNWasrX2k+qZLq3g205MKjLTArMCkGA1Ud
EQQiMCCCDyoub3BlbnJ1c3R5Lm9yZ4INb3BlbnJ1c3R5Lm9yZzAKBggqhkjOPQQD
AgNIADBFAiAjISZ9aEKmobKGlT76idO740J6jPaX/hOrm41MLeg69AIhAJqKrSyz
wD/AAF5fR6tXmBqlnpQOmtxfdy13wDr4MT3h
-----END CERTIFICATE-----


================================================
FILE: pingora-proxy/tests/utils/conf/keys/server_boringssl_openssl.crt
================================================
-----BEGIN CERTIFICATE-----
MIIB9zCCAZ2gAwIBAgIUMI7aLvTxyRFCHhw57hGt4U6yupcwCgYIKoZIzj0EAwIw
ZDELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAkNBMRYwFAYDVQQHDA1TYW4gRnJhbmNp
c2NvMRgwFgYDVQQKDA9DbG91ZGZsYXJlLCBJbmMxFjAUBgNVBAMMDW9wZW5ydXN0
eS5vcmcwHhcNMjIwNDExMjExMzEzWhcNMzIwNDA4MjExMzEzWjBkMQswCQYDVQQG
EwJVUzELMAkGA1UECAwCQ0ExFjAUBgNVBAcMDVNhbiBGcmFuY2lzY28xGDAWBgNV
BAoMD0Nsb3VkZmxhcmUsIEluYzEWMBQGA1UEAwwNb3BlbnJ1c3R5Lm9yZzBZMBMG
ByqGSM49AgEGCCqGSM49AwEHA0IABNn/9RZtR48knaJD6tk9BdccaJfZ0hGEPn6B
SDXmlmJPhcTBqa4iUwW/ABpGvO3FpJcNWasrX2k+qZLq3g205MKjLTArMCkGA1Ud
EQQiMCCCDyoub3BlbnJ1c3R5Lm9yZ4INb3BlbnJ1c3R5Lm9yZzAKBggqhkjOPQQD
AgNIADBFAiAjISZ9aEKmobKGlT76idO740J6jPaX/hOrm41MLeg69AIhAJqKrSyz
wD/AAF5fR6tXmBqlnpQOmtxfdy13wDr4MT3h
-----END CERTIFICATE-----


================================================
FILE: pingora-proxy/tests/utils/conf/keys/server_boringssl_openssl.csr
================================================
-----BEGIN CERTIFICATE REQUEST-----
MIIBJzCBzgIBADBsMQswCQYDVQQGEwJVUzETMBEGA1UECAwKQ2FsaWZvcm5pYTEW
MBQGA1UEBwwNU2FuIEZyYW5jaXNjbzEYMBYGA1UECgwPQ2xvdWRmbGFyZSwgSW5j
MRYwFAYDVQQDDA1vcGVucnVzdHkub3JnMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcD
QgAE2f/1Fm1HjySdokPq2T0F1xxol9nSEYQ+foFINeaWYk+FxMGpriJTBb8AGka8
7cWklw1ZqytfaT6pkureDbTkwqAAMAoGCCqGSM49BAMCA0gAMEUCIFyDN8eamnoY
XydKn2oI7qImigxahyCftzjxkIEV5IKbAiEAo5l72X4U+YTVYmyPPnJIj2v5nA1R
RuUfMh5sXzwlwuM=
-----END CERTIFICATE REQUEST-----


================================================
FILE: pingora-proxy/tests/utils/conf/keys/server_rustls.crt
================================================
-----BEGIN CERTIFICATE-----
MIICJzCCAc6gAwIBAgIUU+G0acG/uiMu1ZDSjlcoY4gH53QwCgYIKoZIzj0EAwIw
ZDELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAkNBMRYwFAYDVQQHDA1TYW4gRnJhbmNp
c2NvMRgwFgYDVQQKDA9DbG91ZGZsYXJlLCBJbmMxFjAUBgNVBAMMDW9wZW5ydXN0
eS5vcmcwHhcNMjQwNzI0MTMzOTQ4WhcNMzQwNzIyMTMzOTQ4WjBkMQswCQYDVQQG
EwJVUzELMAkGA1UECAwCQ0ExFjAUBgNVBAcMDVNhbiBGcmFuY2lzY28xGDAWBgNV
BAoMD0Nsb3VkZmxhcmUsIEluYzEWMBQGA1UEAwwNb3BlbnJ1c3R5Lm9yZzBZMBMG
ByqGSM49AgEGCCqGSM49AwEHA0IABNn/9RZtR48knaJD6tk9BdccaJfZ0hGEPn6B
SDXmlmJPhcTBqa4iUwW/ABpGvO3FpJcNWasrX2k+qZLq3g205MKjXjBcMDsGA1Ud
EQQ0MDKCDyoub3BlbnJ1c3R5Lm9yZ4INb3BlbnJ1c3R5Lm9yZ4IHY2F0LmNvbYIH
ZG9nLmNvbTAdBgNVHQ4EFgQUnfYAFWyQnSN57IGokj7jcz8ChJQwCgYIKoZIzj0E
AwIDRwAwRAIgQr+Ly2cH04CncbnbhUf4hBl5frTp1pXgGnn8dYjd+UcCICuunEtp
H/a42/sVGBFvjS6FOFe6ZDs4oWBNEqQSw0S2
-----END CERTIFICATE-----

================================================
FILE: pingora-proxy/tests/utils/conf/keys/server_s2n.crt
================================================
-----BEGIN CERTIFICATE-----
MIICJzCCAc6gAwIBAgIUU+G0acG/uiMu1ZDSjlcoY4gH53QwCgYIKoZIzj0EAwIw
ZDELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAkNBMRYwFAYDVQQHDA1TYW4gRnJhbmNp
c2NvMRgwFgYDVQQKDA9DbG91ZGZsYXJlLCBJbmMxFjAUBgNVBAMMDW9wZW5ydXN0
eS5vcmcwHhcNMjQwNzI0MTMzOTQ4WhcNMzQwNzIyMTMzOTQ4WjBkMQswCQYDVQQG
EwJVUzELMAkGA1UECAwCQ0ExFjAUBgNVBAcMDVNhbiBGcmFuY2lzY28xGDAWBgNV
BAoMD0Nsb3VkZmxhcmUsIEluYzEWMBQGA1UEAwwNb3BlbnJ1c3R5Lm9yZzBZMBMG
ByqGSM49AgEGCCqGSM49AwEHA0IABNn/9RZtR48knaJD6tk9BdccaJfZ0hGEPn6B
SDXmlmJPhcTBqa4iUwW/ABpGvO3FpJcNWasrX2k+qZLq3g205MKjXjBcMDsGA1Ud
EQQ0MDKCDyoub3BlbnJ1c3R5Lm9yZ4INb3BlbnJ1c3R5Lm9yZ4IHY2F0LmNvbYIH
ZG9nLmNvbTAdBgNVHQ4EFgQUnfYAFWyQnSN57IGokj7jcz8ChJQwCgYIKoZIzj0E
AwIDRwAwRAIgQr+Ly2cH04CncbnbhUf4hBl5frTp1pXgGnn8dYjd+UcCICuunEtp
H/a42/sVGBFvjS6FOFe6ZDs4oWBNEqQSw0S2
-----END CERTIFICATE-----

================================================
FILE: pingora-proxy/tests/utils/conf/keys/v3.ext
================================================
authorityKeyIdentifier=keyid,issuer
basicConstraints=CA:FALSE
keyUsage = digitalSignature, nonRepudiation, keyEncipherment, dataEncipherment

================================================
FILE: pingora-proxy/tests/utils/conf/origin/.gitignore
================================================
**
!html
!html/**
!conf
!conf/**
!.gitignore


================================================
FILE: pingora-proxy/tests/utils/conf/origin/conf/nginx.conf
================================================

#user  nobody;
worker_processes  1;

error_log  /dev/stdout;
#error_log  logs/error.log  notice;
#error_log  logs/error.log  info;

pid        /tmp/pingora_mock_origin.pid;
master_process off;
daemon off;

events {
    worker_connections  4096;
}


http {
    #include       mime.types;
    #default_type  application/octet-stream;

    #log_format  main  '$remote_addr - $remote_user [$time_local] "$request" '
    #                  '$status $body_bytes_sent "$http_referer" '
    #                  '"$http_user_agent" "$http_x_forwarded_for"';

    access_log  off;

    sendfile        on;
    #tcp_nopush     on;

    keepalive_timeout  60;
    keepalive_requests 99999;

    lua_shared_dict hit_counter 10m;

    #gzip  on;

    # mTLS endpoint
    server {
        listen       8444 ssl http2;
        ssl_certificate keys/server.crt;
        ssl_certificate_key keys/key.pem;
        ssl_protocols TLSv1.2;
        ssl_ciphers TLS-AES-128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256;
        ssl_client_certificate keys/root.crt;
        ssl_verify_client on;
        ssl_verify_depth 4;

        location / {
            return 200 "hello world";
        }
    }

    # secp384r1 endpoint (ECDH and ECDSA)
    server {
        listen 8445 ssl http2;
        ssl_protocols TLSv1.2;
        ssl_ciphers TLS-AES-128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-SHA384:ECDHE-ECDSA-AES256-SHA512;
        ssl_certificate keys/curve_test.384.crt;
        ssl_certificate_key keys/curve_test.384.key.pem;
        ssl_ecdh_curve secp384r1;

        location /384 {
            return 200 "Happy Friday!";
        }
    }

    # secp521r1 endpoint (ECDH and ECDSA)
    server {
        listen 8446 ssl http2;
        ssl_protocols TLSv1.2;
        ssl_ciphers TLS-AES-128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-SHA384:ECDHE-ECDSA-AES256-SHA512;
        ssl_certificate keys/curve_test.521.crt;
        ssl_certificate_key keys/curve_test.521.key.pem;
        ssl_ecdh_curve secp521r1;

        location /521 {
            return 200 "Happy Monday!";
        }
    }

    server {
        listen       8000 http2;
        # 8001 is used for bad_lb test only to avoid unexpected connection reuse
        listen       8001;
        listen       [::]:8000;
        #listen       8443 ssl;
        listen       unix:/tmp/pingora_nginx_test.sock;
        listen       8443 ssl http2;
        server_name  localhost;

        ssl_certificate keys/server.crt;
        ssl_certificate_key keys/key.pem;
        ssl_protocols TLSv1.2;
        ssl_ciphers TLS-AES-128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES128-GCM-SHA256;

        # for benchmark
        http2_max_requests 999999;

        # increase max body size for /upload/ test
        client_max_body_size 128m;
        #charset koi8-r;

        #access_log  logs/host.access.log  main;

        add_header Origin-Http2 $http2;

        location / {
            root   ./html;
            index  index.html index.htm;
        }

        # this allows an arbitrary prefix to be included in URLs, so
        # that tests can control caching.
        location ~ ^/unique/[^/]+(/.*)$ {
            rewrite ^/unique/[^/]+(/.*)$ $1 last;
        }

        # this serves as an origin hit counter for an arbitrary prefix, which
        # then redirects to the rest of the URL like our unique/... endpoint.
        location ~ ^/hitcounted/[^/]+(/.*)$ {
            rewrite_by_lua_block {
                -- Extract specified ID
                local _, _, id = string.find(ngx.var.request_uri, "[^/]+/([^/]+)")

                -- Incr hit counter
                local hits = ngx.shared.hit_counter
                if not hits:get(id) then
                    hits:safe_set(id, 0, nil)
                end
                local value = hits:incr(id, 1)

                -- Rewrite URI to the requested destination
                local destStartIndex = string.find(ngx.var.request_uri, id) + string.len(id)
                local dest = string.sub(ngx.var.request_uri, destStartIndex)
                ngx.req.set_uri(dest, true)
            }
        }

        # this serves the hit count from the hitcounted endpoint
        location ~ ^/read_hit_count/[^/]+(/.*)$ {
            content_by_lua_block {
                -- Find the hit count for the given ID and return it.
                local _, _, id = string.find(ngx.var.request_uri, "[^/]+/([^/]+)")
                local hits = ngx.shared.hit_counter
                ngx.print(hits:get(id) or 0)
            }
        }

        location /test {
            return 200;
        }
        location /test2 {
            return 200 "hello world";
        }
        location /test3 {
            #return 200;
            content_by_lua_block {
                ngx.print("hello world")
            }
        }

        location /test4 {
            rewrite_by_lua_block {
                ngx.exit(200)
            }
            #return 201;

        }

        location /now {
            header_filter_by_lua_block {
                ngx.header["x-epoch"] = ngx.now()
            }
            return 200 "hello world";
        }

        location /brotli {
            header_filter_by_lua_block {
                local ae = ngx.req.get_headers()["Accept-Encoding"]
                if ae and ae:find("br") then
                    ngx.header["Content-Encoding"] = "br"
                else
                    return ngx.exit(400)
                end
            }
            content_by_lua_block {
                -- brotli compressed 'hello'.
                ngx.print("\x0f\x02\x80hello\x03")
            }
        }

        location /cache_control {
            header_filter_by_lua_block {
                local h = ngx.req.get_headers()
                if h["set-cache-control"] then
                    ngx.header["Cache-Control"] = h["set-cache-control"]
                end
                if h["set-cache-tag"] then
                    ngx.header["Cache-Tag"] = h["set-cache-tag"]
                end
                if h["set-revalidated"] then
                    return ngx.exit(304)
                end
            }
            return 200 "hello world";
        }

        location /revalidate_now {
            header_filter_by_lua_block {
                ngx.header["x-epoch"] = ngx.now()
                ngx.header["Last-Modified"] = "Tue, 03 May 2022 01:04:39 GMT"
                ngx.header["Etag"] = '"abcd"'
                local h = ngx.req.get_headers()
                if h["if-modified-since"] or h["if-none-match"] then
                    -- just assume they match
                    return ngx.exit(304)
                end
            }
            return 200 "hello world";
        }

        location /vary {
            header_filter_by_lua_block {
                ngx.header["Last-Modified"] = "Tue, 03 May 2022 01:04:39 GMT"
                ngx.header["Etag"] = '"abcd"'
                local h = ngx.req.get_headers()
                if h["set-vary"] then
                    ngx.header["Vary"] = h["set-vary"]
                end
                ngx.header["x-epoch"] = ngx.now()
                if not h["x-no-revalidate"] and (h["if-modified-since"] or h["if-none-match"]) then
                    -- just assume they match
                    return ngx.exit(304)
                end
            }
            return 200 "hello world";
        }

        location /no_if_headers {
            content_by_lua_block {
                local h = ngx.req.get_headers()
                if h["if-modified-since"] or h["if-none-match"] or h["range"] then
                    return ngx.exit(400)
                end
                ngx.say("no if headers detected")
            }
        }

        location /client_ip {
            add_header x-client-ip $remote_addr;
            return 200;
        }

        # 1. A origin load balancer that rejects reused connections.
        # This is to simulate the common problem when an upstream LB drops
        # a connection silently after being `keepalive`d for a while.
        # 2. A middlebox might drop the connection if the origin takes too long
        # to respond. We should not retry in this case.
        location /bad_lb {
            rewrite_by_lua_block {
                ngx.sleep(1)
                if tonumber(ngx.var.connection_requests) > 1 then
                    -- force drop the request and close the connection
                    ngx.exit(444)
                end
                ngx.req.read_body()
                local data = ngx.req.get_body_data()
                if data then
                    ngx.say(data)
                else
                    ngx.say("dog!")
                end
            }
        }

        location /duplex/ {
            client_max_body_size 1G;
            content_by_lua_block {
                ngx.print(string.rep("A", 64))
                ngx.print(string.rep("A", 64))
                ngx.print(string.rep("A", 64))
                ngx.print(string.rep("A", 64))
                ngx.print(string.rep("A", 64))
                -- without ngx.req.read_body(), the body will return without waiting for req body
            }
        }

        location /upload/ {
            client_max_body_size 1G;
            content_by_lua_block {
                ngx.req.read_body()
                ngx.print(string.rep("A", 64))
                ngx.print(string.rep("A", 64))
                ngx.print(string.rep("A", 64))
                ngx.print(string.rep("A", 64))
                ngx.print(string.rep("A", 64))
            }
        }

        location /upload_connection_die/ {
            content_by_lua_block {
                ngx.status = ngx.HTTP_OK
                ngx.print("")
                ngx.flush(true)

                time.sleep(1)
                ngx.exit(444)
            }
        }

        location /download/ {
            content_by_lua_block {
                ngx.req.read_body()
                local body = string.rep("A", 4194304)
                ngx.header["Content-Length"] = #body
                ngx.print(body)
            }
        }

        location /download_large/ {
            content_by_lua_block {
                ngx.req.read_body()
                local chunk = string.rep("A", 1048576) -- 1MB chunk
                local total_size = 128 * 1048576 -- 128MB total
                ngx.header["Content-Length"] = total_size
                for i = 1, 128 do
                    ngx.print(chunk)
                    ngx.flush()
                end
            }
        }

        location /tls_verify {
            keepalive_timeout 0;
            return 200;
        }

        location /noreuse {
            keepalive_timeout 0;
            return 200 "hello world";
        }

        location /set_cookie {
            add_header Set-Cookie "chocolate chip";
            return 200 "hello world";
        }

        location /chunked {
            content_by_lua_block {
                ngx.req.read_body()
                ngx.print(string.rep("A", 64))
            }
        }

        location /echo {
            content_by_lua_block {
                ngx.req.read_body()
                local data = ngx.req.get_body_data()
                if data then
                    ngx.print(data)
                end
            }
        }

        location /low_ttl {
            add_header Cache-Control "public, max-age=0";
            return 200 "low ttl";
        }

        location /connection_die {
            content_by_lua_block {
                ngx.print(string.rep("A", 5))
                ngx.flush()
                ngx.exit(444) -- 444 kills the connection right away
            }
        }

        location /103 {
            content_by_lua_block {
                local sock, err = ngx.req.socket(true)
                if not sock then
                    ngx.log(ngx.ERR, "Failed socket:", err)
                    return
                end

                local ok, err = sock:send("HTTP/1.1 103 Early Hints\r\nLink: </style.css>; rel=preload\r\n\r\n")
                if not ok then
                    ngx.log(ngx.ERR, "Failed 103:", err)
                end

                ngx.sleep(1)

                local ok, err = sock:send("HTTP/1.1 200 OK\r\nContent-Length: 6\r\n\r\n123456\r\n\r\n")
                if not ok then
                    ngx.log(ngx.ERR, "Failed 200:", err)
                end
            }
        }

        location /103-die {
            content_by_lua_block {
                local sock, err = ngx.req.socket(true)
                if not sock then
                    ngx.log(ngx.ERR, "Failed socket:", err)
                    return
                end

                local ok, err = sock:send("HTTP/1.1 103 Early Hints\r\nLink: </style.css>; rel=preload\r\n\r\n")
                if not ok then
                    ngx.log(ngx.ERR, "Failed 103:", err)
                end

                ngx.sleep(1)

                ngx.exit(444) -- 444 kills the connection right away
            }
        }

        location /no_compression {
            gzip off; # avoid accidental turn it on at server block
            content_by_lua_block {
                ngx.print(string.rep("B", 32))
            }
        }

        location /file_maker {
            gzip off; # fixed content size
            content_by_lua_block {
                local size = tonumber(ngx.var.http_x_set_size) or 1024
                ngx.print(string.rep("A", size))
            }
        }

        location /gzip {
            alias ./html;
            gzip on;
            gzip_min_length   0;
            gzip_types        *;
            add_header received-accept-encoding $http_accept_encoding;
        }

        location /sleep {
            rewrite_by_lua_block {
                local sleep_sec = tonumber(ngx.var.http_x_set_sleep) or 1
                ngx.sleep(sleep_sec)
                if ngx.var.http_x_abort then
                    -- force drop the request and close the connection
                    ngx.exit(444)
                end
            }
            content_by_lua_block {
                if ngx.var.http_x_error_header then
                    ngx.status = 500
                    ngx.exit(0)
                    return
                end
                ngx.print("hello ")
                ngx.flush()
                local sleep_sec = tonumber(ngx.var.http_x_set_body_sleep) or 0
                ngx.sleep(sleep_sec)
                if ngx.var.http_x_abort_body then
                    ngx.flush()
                    -- force drop the request and close the connection
                    ngx.exit(444)
                    return
                end
                ngx.print("world")
            }
            header_filter_by_lua_block {
                if ngx.var.http_x_no_store then
                    ngx.header["Cache-control"] = "no-store"
                end
                if ngx.var.http_x_no_stale_revalidate then
                    ngx.header["Cache-control"] = "stale-while-revalidate=0"
                end
                if ngx.var.http_x_set_content_length then
                    ngx.header["Content-Length"] = "11" -- based on "hello world"
                end
            }
        }

        location /set_content_length {
            header_filter_by_lua_block {
                if ngx.var.http_x_set_content_length then
                    ngx.header["Content-Length"] = ngx.var.http_x_set_content_length
                end
            }
            return 200 "hello world";
        }

        location /slow_body {
            content_by_lua_block {
                local sleep_sec = tonumber(ngx.var.http_x_set_sleep) or 1
                local hello_to = ngx.var.http_x_set_hello or "world"
                ngx.flush()
                ngx.sleep(sleep_sec)
                ngx.print("hello ")
                ngx.flush()
                ngx.sleep(sleep_sec)
                ngx.print(hello_to)
                ngx.sleep(sleep_sec)
                ngx.print("!")
            }
            header_filter_by_lua_block {
                if ngx.var.http_x_no_store then
                    ngx.header["Cache-control"] = "no-store"
                end
            }
        }

        location /content_type {
            header_filter_by_lua_block {
                ngx.header["Content-Type"] = ngx.var.http_set_content_type
            }
            return 200 "hello world";
        }

        location /upgrade {
            content_by_lua_block {
                ngx.status = 101
                ngx.header['Upgrade'] = 'websocket'
                ngx.header['Connection'] = 'Upgrade'
                ngx.say('hello')
            }
        }

        location /upgrade_echo_body {
            rewrite_by_lua_block {
                ngx.req.read_body()
                local data = ngx.req.get_body_data()
                ngx.status = 101
                ngx.header['Upgrade'] = 'websocket'
                ngx.header['Connection'] = 'Upgrade'

                if data then
                    ngx.print(data)
                end
            }
        }

        #error_page  404              /404.html;

        # redirect server error pages to the static page /50x.html
        #
        error_page   500 502 503 504  /50x.html;
        location = /50x.html {
            root   html;
        }
    }
}


================================================
FILE: pingora-proxy/tests/utils/conf/origin/html/index.html
================================================
Hello World!


================================================
FILE: pingora-proxy/tests/utils/mock_origin.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use once_cell::sync::Lazy;
use std::path::Path;
use std::process;
use std::{thread, time};

pub static MOCK_ORIGIN: Lazy<bool> = Lazy::new(init);

fn init() -> bool {
    #[cfg(feature = "rustls")]
    let src_cert_path = format!(
        "{}/tests/utils/conf/keys/server_rustls.crt",
        env!("CARGO_MANIFEST_DIR")
    );
    #[cfg(feature = "openssl_derived")]
    let src_cert_path = format!(
        "{}/tests/utils/conf/keys/server_boringssl_openssl.crt",
        env!("CARGO_MANIFEST_DIR")
    );
    #[cfg(feature = "s2n")]
    let src_cert_path = format!(
        "{}/tests/utils/conf/keys/server_s2n.crt",
        env!("CARGO_MANIFEST_DIR")
    );

    #[cfg(feature = "any_tls")]
    {
        let mut dst_cert_path = format!("{}/tests/keys/server.crt", env!("CARGO_MANIFEST_DIR"));
        std::fs::copy(Path::new(&src_cert_path), Path::new(&dst_cert_path));
        dst_cert_path = format!(
            "{}/tests/utils/conf/keys/server.crt",
            env!("CARGO_MANIFEST_DIR")
        );
        std::fs::copy(Path::new(&src_cert_path), Path::new(&dst_cert_path));
    }

    // TODO: figure out a way to kill openresty when exiting
    process::Command::new("pkill")
        .args(["-F", "/tmp/pingora_mock_origin.pid"])
        .spawn()
        .unwrap()
        .wait();
    let _origin = thread::spawn(|| {
        process::Command::new("openresty")
            .args(["-p", &format!("{}/origin", super::conf_dir())])
            .output()
            .unwrap();
    });
    // wait until the server is up
    thread::sleep(time::Duration::from_secs(2));
    true
}


================================================
FILE: pingora-proxy/tests/utils/mod.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#![allow(unused)]

#[cfg(feature = "any_tls")]
pub mod cert;

pub mod mock_origin;
pub mod server_utils;
pub mod websocket;

use once_cell::sync::Lazy;
use tokio::runtime::{Builder, Runtime};

// for tests with a static connection pool, if we use tokio::test the reactor
// will no longer be associated with the backing pool fds since it's dropped per test
pub static GLOBAL_RUNTIME: Lazy<Runtime> =
    Lazy::new(|| Builder::new_multi_thread().enable_all().build().unwrap());

pub fn conf_dir() -> String {
    format!("{}/tests/utils/conf", env!("CARGO_MANIFEST_DIR"))
}


================================================
FILE: pingora-proxy/tests/utils/server_utils.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#[cfg(feature = "any_tls")]
use super::cert;
use async_trait::async_trait;
use clap::Parser;
use http::header::{ACCEPT_ENCODING, CONTENT_LENGTH, TRANSFER_ENCODING, VARY};
use http::HeaderValue;
use log::error;
use once_cell::sync::Lazy;
use pingora_cache::cache_control::CacheControl;
use pingora_cache::hashtable::ConcurrentHashTable;
use pingora_cache::key::HashBinary;
use pingora_cache::lock::CacheKeyLockImpl;
use pingora_cache::{
    eviction::simple_lru::Manager, filters::resp_cacheable, lock::CacheLock, predictor::Predictor,
    set_compression_dict_path, CacheKey, CacheMeta, CacheMetaDefaults, CachePhase, MemCache,
    NoCacheReason, RespCacheable,
};
use pingora_cache::{
    CacheOptionOverrides, ForcedFreshness, HitHandler, PurgeType, VarianceBuilder,
};
use pingora_core::apps::{HttpServerApp, HttpServerOptions};
use pingora_core::modules::http::compression::ResponseCompression;
use pingora_core::protocols::{
    http::error_resp::gen_error_response, l4::socket::SocketAddr, Digest,
};
use pingora_core::server::configuration::Opt;
use pingora_core::services::{Service, ServiceWithDependents};
use pingora_core::upstreams::peer::HttpPeer;
use pingora_core::utils::tls::CertKey;
use pingora_error::{Error, ErrorSource, ErrorType::*, Result};
use pingora_http::{RequestHeader, ResponseHeader};
use pingora_proxy::{FailToProxy, ProxyHttp, Session};
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use std::thread;
use std::time::{Duration, SystemTime};

pub struct ExampleProxyHttps {}

pub const TEST_PSK_IDENTITY: &str = "test-psk-identity";
pub const TEST_PSK_SECRET: &str = "i2Wx8jrYVi5Vt7HSL/fsk003+PnmfcFuwWMsUyQvcZ4=";

#[allow(clippy::upper_case_acronyms)]
#[derive(Default)]
pub struct CTX {
    conn_reused: bool,
    upstream_client_addr: Option<SocketAddr>,
    upstream_server_addr: Option<SocketAddr>,
}

// Common logic for both ProxyHttp(s) types
fn connected_to_upstream_common(
    reused: bool,
    digest: Option<&Digest>,
    ctx: &mut CTX,
) -> Result<()> {
    ctx.conn_reused = reused;
    let socket_digest = digest
        .expect("upstream connector digest should be set for HTTP sessions")
        .socket_digest
        .as_ref()
        .expect("socket digest should be set for HTTP sessions");
    ctx.upstream_client_addr = socket_digest.local_addr().cloned();
    ctx.upstream_server_addr = socket_digest.peer_addr().cloned();

    Ok(())
}

fn response_filter_common(
    session: &mut Session,
    response: &mut ResponseHeader,
    ctx: &mut CTX,
) -> Result<()> {
    if ctx.conn_reused {
        response.insert_header("x-conn-reuse", "1")?;
    }

    let client_addr = session.client_addr();
    let server_addr = session.server_addr();
    response.insert_header(
        "x-client-addr",
        client_addr.map_or_else(|| "unset".into(), |a| a.to_string()),
    )?;
    response.insert_header(
        "x-server-addr",
        server_addr.map_or_else(|| "unset".into(), |a| a.to_string()),
    )?;

    response.insert_header(
        "x-upstream-client-addr",
        ctx.upstream_client_addr
            .as_ref()
            .map_or_else(|| "unset".into(), |a| a.to_string()),
    )?;
    response.insert_header(
        "x-upstream-server-addr",
        ctx.upstream_server_addr
            .as_ref()
            .map_or_else(|| "unset".into(), |a| a.to_string()),
    )?;

    Ok(())
}

#[async_trait]
#[cfg(feature = "any_tls")]
impl ProxyHttp for ExampleProxyHttps {
    type CTX = CTX;
    fn new_ctx(&self) -> Self::CTX {
        CTX::default()
    }

    async fn upstream_peer(
        &self,
        session: &mut Session,
        _ctx: &mut Self::CTX,
    ) -> Result<Box<HttpPeer>> {
        let session = session.as_downstream();
        let req = session.req_header();

        let port = req
            .headers
            .get("x-port")
            .map_or("8443", |v| v.to_str().unwrap());
        let sni = req.headers.get("sni").map_or("", |v| v.to_str().unwrap());
        let alt = req
            .headers
            .get("alt")
            .map(|v| v.to_str().unwrap().to_string());

        let client_cert = session.get_header_bytes("client_cert");

        let mut peer = Box::new(HttpPeer::new(
            format!("127.0.0.1:{port}"),
            true,
            sni.to_string(),
        ));
        peer.options.alternative_cn = alt;

        let verify = session.get_header_bytes("verify") == b"1";
        peer.options.verify_cert = verify;

        let verify_host = session.get_header_bytes("verify_host") == b"1";
        peer.options.verify_hostname = verify_host;

        if matches!(client_cert, b"1" | b"2") {
            let (mut certs, key) = if client_cert == b"1" {
                (vec![cert::LEAF_CERT.clone()], cert::LEAF_KEY.clone())
            } else {
                (vec![cert::LEAF2_CERT.clone()], cert::LEAF2_KEY.clone())
            };
            if session.get_header_bytes("client_intermediate") == b"1" {
                certs.push(cert::INTERMEDIATE_CERT.clone());
            }
            #[cfg(feature = "s2n")]
            {
                let combined_pem = certs.into_iter().flatten().collect();
                peer.client_cert_key = Some(Arc::new(CertKey::new(combined_pem, key)));
            }
            #[cfg(not(feature = "s2n"))]
            {
                peer.client_cert_key = Some(Arc::new(CertKey::new(certs, key)));
            }
        }

        #[cfg(feature = "s2n")]
        if let Some(psk_identity) = req.headers.get("psk_identity") {
            use pingora_core::{
                protocols::tls::{Psk, PskConfig},
                tls::PskHmac,
            };

            let psk = Psk::new(
                psk_identity.to_str().unwrap().to_string(),
                TEST_PSK_SECRET.as_bytes().to_vec(),
                PskHmac::SHA256,
            );
            peer.options.psk = Some(Arc::new(PskConfig::new(vec![psk])));
        }

        if session.get_header_bytes("x-h2") == b"true" {
            // default is 1, 1
            peer.options.set_http_version(2, 2);
        }

        Ok(peer)
    }

    async fn response_filter(
        &self,
        session: &mut Session,
        upstream_response: &mut ResponseHeader,
        ctx: &mut Self::CTX,
    ) -> Result<()>
    where
        Self::CTX: Send + Sync,
    {
        response_filter_common(session, upstream_response, ctx)
    }

    async fn upstream_request_filter(
        &self,
        session: &mut Session,
        req: &mut RequestHeader,
        _ctx: &mut Self::CTX,
    ) -> Result<()> {
        let host = session.get_header_bytes("host-override");
        if host != b"" {
            req.insert_header("host", host)?;
        }
        Ok(())
    }

    async fn connected_to_upstream(
        &self,
        _http_session: &mut Session,
        reused: bool,
        _peer: &HttpPeer,
        #[cfg(unix)] _fd: std::os::unix::io::RawFd,
        #[cfg(windows)] _sock: std::os::windows::io::RawSocket,
        digest: Option<&Digest>,
        ctx: &mut CTX,
    ) -> Result<()> {
        connected_to_upstream_common(reused, digest, ctx)
    }
}

pub struct ExampleProxyHttp {}

#[async_trait]
impl ProxyHttp for ExampleProxyHttp {
    type CTX = CTX;
    fn new_ctx(&self) -> Self::CTX {
        CTX::default()
    }

    async fn early_request_filter(
        &self,
        session: &mut Session,
        _ctx: &mut Self::CTX,
    ) -> Result<()> {
        let req = session.req_header();
        let downstream_compression = req.headers.get("x-downstream-compression").is_some();
        if downstream_compression {
            session
                .downstream_modules_ctx
                .get_mut::<ResponseCompression>()
                .unwrap()
                .adjust_level(6);
        } else {
            // enable upstream compression for all requests by default
            session.upstream_compression.adjust_level(6);
        }
        Ok(())
    }

    async fn request_filter(&self, session: &mut Session, _ctx: &mut Self::CTX) -> Result<bool> {
        let req = session.req_header();

        let write_timeout = req
            .headers
            .get("x-write-timeout")
            .and_then(|v| v.to_str().ok().and_then(|v| v.parse().ok()));

        let min_rate = req
            .headers
            .get("x-min-rate")
            .and_then(|v| v.to_str().ok().and_then(|v| v.parse().ok()));

        let close_on_response_before_downstream_finish = req
            .headers
            .get("x-close-on-response-before-downstream-finish")
            .is_some();

        let downstream_compression = req.headers.get("x-downstream-compression").is_some();
        if !downstream_compression {
            // enable upstream compression for all requests by default
            session.upstream_compression.adjust_level(6);
            // also disable downstream compression in order to test the upstream one
            session
                .downstream_modules_ctx
                .get_mut::<ResponseCompression>()
                .unwrap()
                .adjust_level(0);
        }

        session.set_min_send_rate(min_rate);
        session.set_write_timeout(write_timeout.map(Duration::from_secs));
        session.set_close_on_response_before_downstream_finish(
            close_on_response_before_downstream_finish,
        );

        Ok(false)
    }

    async fn response_filter(
        &self,
        session: &mut Session,
        upstream_response: &mut ResponseHeader,
        ctx: &mut Self::CTX,
    ) -> Result<()> {
        response_filter_common(session, upstream_response, ctx)
    }

    async fn upstream_peer(
        &self,
        session: &mut Session,
        _ctx: &mut Self::CTX,
    ) -> Result<Box<HttpPeer>> {
        let req = session.req_header();
        #[cfg(unix)]
        if req.headers.contains_key("x-uds-peer") {
            return Ok(Box::new(HttpPeer::new_uds(
                "/tmp/pingora_nginx_test.sock",
                false,
                "".to_string(),
            )?));
        }
        let port = req
            .headers
            .get("x-port")
            .map_or("8000", |v| v.to_str().unwrap());

        let mut peer = Box::new(HttpPeer::new(
            format!("127.0.0.1:{port}"),
            false,
            "".to_string(),
        ));

        if session.get_header_bytes("x-h2") == b"true" {
            // default is 1, 1
            peer.options.set_http_version(2, 2);
        }

        Ok(peer)
    }

    async fn connected_to_upstream(
        &self,
        _http_session: &mut Session,
        reused: bool,
        _peer: &HttpPeer,
        #[cfg(unix)] _fd: std::os::unix::io::RawFd,
        #[cfg(windows)] _sock: std::os::windows::io::RawSocket,
        digest: Option<&Digest>,
        ctx: &mut CTX,
    ) -> Result<()> {
        connected_to_upstream_common(reused, digest, ctx)
    }
}

static CACHE_BACKEND: Lazy<MemCache> = Lazy::new(MemCache::new);
const CACHE_DEFAULT: CacheMetaDefaults =
    CacheMetaDefaults::new(|_| Some(Duration::from_secs(1)), 1, 1);
static CACHE_PREDICTOR: Lazy<Predictor<32>> = Lazy::new(|| Predictor::new(5, None));
static EVICTION_MANAGER: Lazy<Manager> = Lazy::new(|| Manager::new(8192)); // 8192 bytes
static CACHE_LOCK: Lazy<Box<CacheKeyLockImpl>> =
    Lazy::new(|| CacheLock::new_boxed(std::time::Duration::from_secs(2)));
// Example of how one might restrict which fields can be varied on.
static CACHE_VARY_ALLOWED_HEADERS: Lazy<Option<HashSet<&str>>> =
    Lazy::new(|| Some(vec!["accept", "accept-encoding"].into_iter().collect()));

// #[allow(clippy::upper_case_acronyms)]
pub struct CacheCTX {
    upstream_status: Option<u16>,
}

pub struct ExampleProxyCache {}

#[async_trait]
impl ProxyHttp for ExampleProxyCache {
    type CTX = CacheCTX;
    fn new_ctx(&self) -> Self::CTX {
        CacheCTX {
            upstream_status: None,
        }
    }

    async fn early_request_filter(
        &self,
        session: &mut Session,
        _ctx: &mut Self::CTX,
    ) -> Result<()> {
        if session
            .req_header()
            .headers
            .get("x-downstream-compression")
            .is_some()
        {
            session
                .downstream_modules_ctx
                .get_mut::<ResponseCompression>()
                .unwrap()
                .adjust_level(6);
        }
        if session
            .req_header()
            .headers
            .get("x-downstream-decompression")
            .is_some()
        {
            session
                .downstream_modules_ctx
                .get_mut::<ResponseCompression>()
                .unwrap()
                .adjust_decompression(true);
        }
        Ok(())
    }

    async fn upstream_peer(
        &self,
        session: &mut Session,
        _ctx: &mut Self::CTX,
    ) -> Result<Box<HttpPeer>> {
        let req = session.req_header();
        let port = req
            .headers
            .get("x-port")
            .map_or("8000", |v| v.to_str().unwrap());

        let mut peer = Box::new(HttpPeer::new(
            format!("127.0.0.1:{}", port),
            false,
            "".to_string(),
        ));

        if session.get_header_bytes("x-h2") == b"true" {
            // default is 1, 1
            peer.options.set_http_version(2, 2);
        }

        Ok(peer)
    }

    fn request_cache_filter(&self, session: &mut Session, _ctx: &mut Self::CTX) -> Result<()> {
        // TODO: only allow GET & HEAD

        if session.get_header_bytes("x-bypass-cache") != b"" {
            return Ok(());
        }

        // turn on eviction only for some requests to avoid interference across tests
        let eviction = session.req_header().headers.get("x-eviction").map(|_| {
            &*EVICTION_MANAGER as &'static (dyn pingora_cache::eviction::EvictionManager + Sync)
        });
        let lock = session
            .req_header()
            .headers
            .get("x-lock")
            .map(|_| CACHE_LOCK.as_ref());
        let mut overrides = CacheOptionOverrides::default();
        overrides.wait_timeout = Some(Duration::from_secs(2));
        session.cache.enable(
            &*CACHE_BACKEND,
            eviction,
            Some(&*CACHE_PREDICTOR),
            lock,
            Some(overrides),
        );

        if let Some(max_file_size_hdr) = session
            .req_header()
            .headers
            .get("x-cache-max-file-size-bytes")
        {
            let bytes = max_file_size_hdr
                .to_str()
                .unwrap()
                .parse::<usize>()
                .unwrap();
            session.cache.set_max_file_size_bytes(bytes);
        }

        Ok(())
    }

    /// Reference `cache_key_callback` implementation for integration tests.
    ///
    /// Builds the primary key as `{host}{path_and_query}` from the request.
    /// This is **not production ready**: it does not account for `Vary`, custom
    /// request filters, or scheme differences. See the rustdoc on
    /// [`ProxyHttp::cache_key_callback`] for details.
    fn cache_key_callback(&self, session: &Session, _ctx: &mut Self::CTX) -> Result<CacheKey> {
        let req_header = session.req_header();

        let host = req_header
            .headers
            .get(http::header::HOST)
            .and_then(|v| v.to_str().ok())
            .or_else(|| req_header.uri.authority().map(|a| a.as_str()))
            .unwrap_or("");

        let path_and_query = req_header
            .uri
            .path_and_query()
            .map(|pq| pq.as_str())
            .unwrap_or("/");

        Ok(CacheKey::new(
            String::new(),
            format!("{host}{path_and_query}"),
            String::new(),
        ))
    }

    async fn cache_hit_filter(
        &self,
        session: &mut Session,
        _meta: &CacheMeta,
        _hit_handler: &mut HitHandler,
        is_fresh: bool,
        _ctx: &mut Self::CTX,
    ) -> Result<Option<ForcedFreshness>> {
        // allow test header to control force expiry/miss
        if session.get_header_bytes("x-force-miss") != b"" {
            return Ok(Some(ForcedFreshness::ForceMiss));
        }

        if !is_fresh {
            if session.get_header_bytes("x-force-fresh") != b"" {
                return Ok(Some(ForcedFreshness::ForceFresh));
            }
            // already expired
            return Ok(None);
        }

        if session.get_header_bytes("x-force-expire") != b"" {
            return Ok(Some(ForcedFreshness::ForceExpired));
        }
        Ok(None)
    }

    fn cache_vary_filter(
        &self,
        meta: &CacheMeta,
        _ctx: &mut Self::CTX,
        req: &RequestHeader,
    ) -> Option<HashBinary> {
        let mut key = VarianceBuilder::new();

        // Vary per header from origin. Target headers are de-duplicated by key logic.
        let vary_headers_lowercased: Vec<String> = meta
            .headers()
            .get_all(VARY)
            .iter()
            // Filter out any unparseable vary headers.
            .flat_map(|vary_header| vary_header.to_str().ok())
            .flat_map(|vary_header| vary_header.split(','))
            .map(|s| s.trim().to_lowercase())
            .filter(|header_name| {
                // Filter only for allowed headers, if restricted.
                CACHE_VARY_ALLOWED_HEADERS
                    .as_ref()
                    .map(|al| al.contains(header_name.as_str()))
                    .unwrap_or(true)
            })
            .collect();

        vary_headers_lowercased.iter().for_each(|header_name| {
            // Add this header and value to be considered in the variance key.
            key.add_value(
                header_name,
                req.headers
                    .get(header_name)
                    .map(|v| v.as_bytes())
                    .unwrap_or(&[]),
            );
        });

        key.finalize()
    }

    async fn upstream_request_filter(
        &self,
        session: &mut Session,
        upstream_request: &mut RequestHeader,
        _ctx: &mut Self::CTX,
    ) -> Result<()> {
        if let Some(up_accept_encoding) = session
            .req_header()
            .headers
            .get("x-upstream-accept-encoding")
        {
            upstream_request.insert_header(&ACCEPT_ENCODING, up_accept_encoding)?;
        }
        Ok(())
    }

    fn response_cache_filter(
        &self,
        session: &Session,
        resp: &ResponseHeader,
        _ctx: &mut Self::CTX,
    ) -> Result<RespCacheable> {
        // Allow testing the unlikely case of caching a 101 response
        if resp.status == 101
            && session
                .req_header()
                .headers
                .contains_key("x-cache-websocket")
        {
            return Ok(RespCacheable::Cacheable(CacheMeta::new(
                SystemTime::now() + Duration::from_secs(5),
                SystemTime::now(),
                0,
                0,
                resp.clone(),
            )));
        }

        let cc = CacheControl::from_resp_headers(resp);
        Ok(resp_cacheable(
            cc.as_ref(),
            resp.clone(),
            false,
            &CACHE_DEFAULT,
        ))
    }

    async fn upstream_response_filter(
        &self,
        session: &mut Session,
        upstream_response: &mut ResponseHeader,
        ctx: &mut Self::CTX,
    ) -> Result<()> {
        ctx.upstream_status = Some(upstream_response.status.into());
        if session
            .req_header()
            .headers
            .contains_key("x-upstream-fake-http10")
        {
            // TODO to simulate an actual http1.0 origin
            upstream_response.set_version(http::Version::HTTP_10);
            upstream_response.remove_header(&CONTENT_LENGTH);
            upstream_response.remove_header(&TRANSFER_ENCODING);
        }
        Ok(())
    }

    async fn response_filter(
        &self,
        session: &mut Session,
        upstream_response: &mut ResponseHeader,
        ctx: &mut Self::CTX,
    ) -> Result<()>
    where
        Self::CTX: Send + Sync,
    {
        if session.cache.enabled() {
            match session.cache.phase() {
                CachePhase::Hit => upstream_response.insert_header("x-cache-status", "hit")?,
                CachePhase::Miss => upstream_response.insert_header("x-cache-status", "miss")?,
                CachePhase::Stale => upstream_response.insert_header("x-cache-status", "stale")?,
                CachePhase::StaleUpdating => {
                    upstream_response.insert_header("x-cache-status", "stale-updating")?
                }
                CachePhase::Expired => {
                    upstream_response.insert_header("x-cache-status", "expired")?
                }
                CachePhase::Revalidated | CachePhase::RevalidatedNoCache(_) => {
                    upstream_response.insert_header("x-cache-status", "revalidated")?
                }
                _ => upstream_response.insert_header("x-cache-status", "invalid")?,
            }
        } else {
            match session.cache.phase() {
                CachePhase::Disabled(NoCacheReason::Deferred) => {
                    upstream_response.insert_header("x-cache-status", "deferred")?;
                }
                _ => upstream_response.insert_header("x-cache-status", "no-cache")?,
            }
        }
        if let Some(d) = session.cache.lock_duration() {
            upstream_response.insert_header("x-cache-lock-time-ms", format!("{}", d.as_millis()))?
        }
        if let Some(up_stat) = ctx.upstream_status {
            upstream_response.insert_header("x-upstream-status", up_stat.to_string())?;
        }
        Ok(())
    }

    async fn fail_to_proxy(
        &self,
        session: &mut Session,
        e: &Error,
        _ctx: &mut Self::CTX,
    ) -> FailToProxy
    where
        Self::CTX: Send + Sync,
    {
        // default OSS fail_to_proxy with added headers
        let code = match e.etype() {
            HTTPStatus(code) => *code,
            _ => {
                match e.esource() {
                    ErrorSource::Upstream => 502,
                    ErrorSource::Downstream => {
                        match e.etype() {
                            WriteError | ReadError | ConnectionClosed => {
                                /* conn already dead */
                                0
                            }
                            _ => 400,
                        }
                    }
                    ErrorSource::Internal | ErrorSource::Unset => 500,
                }
            }
        };
        if code > 0 {
            let mut resp = gen_error_response(code);
            // any relevant metadata headers to add
            if let Some(d) = session.cache.lock_duration() {
                resp.insert_header("x-cache-lock-time-ms", format!("{}", d.as_millis()))
                    .unwrap();
            }
            session
                .write_response_header(Box::new(resp), true)
                .await
                .unwrap_or_else(|e| {
                    error!("failed to send error response to downstream: {e}");
                });
        }

        FailToProxy {
            error_code: code,
            // default to no reuse, which is safest
            can_reuse_downstream: false,
        }
    }

    fn should_serve_stale(
        &self,
        _session: &mut Session,
        _ctx: &mut Self::CTX,
        error: Option<&Error>, // None when it is called during stale while revalidate
    ) -> bool {
        // enable serve stale while updating
        error.is_none_or(|e| e.esource() == &ErrorSource::Upstream)
    }

    fn is_purge(&self, session: &Session, _ctx: &Self::CTX) -> bool {
        session.req_header().method == "PURGE"
    }
}

fn test_main() {
    env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();

    let opts: Vec<String> = vec![
        "pingora-proxy".into(),
        "-c".into(),
        "tests/pingora_conf.yaml".into(),
    ];
    let mut my_server =
        pingora_core::server::Server::new(Some(Opt::parse_from_args(opts))).unwrap();
    my_server.bootstrap();

    let mut proxy_service_http =
        pingora_proxy::http_proxy_service(&my_server.configuration, ExampleProxyHttp {});
    proxy_service_http.add_tcp("0.0.0.0:6147");
    #[cfg(unix)]
    proxy_service_http.add_uds("/tmp/pingora_proxy.sock", None);

    let mut proxy_service_http_connect =
        pingora_proxy::http_proxy_service(&my_server.configuration, ExampleProxyHttp {});
    let http_logic = proxy_service_http_connect.app_logic_mut().unwrap();
    let mut http_server_options = HttpServerOptions::default();
    http_server_options.allow_connect_method_proxying = true;
    http_logic.server_options = Some(http_server_options);
    proxy_service_http_connect.add_tcp("0.0.0.0:6160");

    let mut proxy_service_h2c =
        pingora_proxy::http_proxy_service(&my_server.configuration, ExampleProxyHttp {});

    let http_logic = proxy_service_h2c.app_logic_mut().unwrap();
    let mut http_server_options = HttpServerOptions::default();
    http_server_options.h2c = true;
    http_logic.server_options = Some(http_server_options);
    proxy_service_h2c.add_tcp("0.0.0.0:6146");

    let mut proxy_service_https_opt: Option<Box<dyn ServiceWithDependents>> = None;

    #[cfg(feature = "any_tls")]
    {
        let mut proxy_service_https =
            pingora_proxy::http_proxy_service(&my_server.configuration, ExampleProxyHttps {});
        proxy_service_https.add_tcp("0.0.0.0:6149");
        let cert_path = format!("{}/tests/keys/server.crt", env!("CARGO_MANIFEST_DIR"));
        let key_path = format!("{}/tests/keys/key.pem", env!("CARGO_MANIFEST_DIR"));
        let mut tls_settings =
            pingora_core::listeners::tls::TlsSettings::intermediate(&cert_path, &key_path).unwrap();
        tls_settings.enable_h2();
        proxy_service_https.add_tls_with_settings("0.0.0.0:6150", None, tls_settings);
        proxy_service_https_opt = Some(Box::new(proxy_service_https))
    }

    let mut proxy_service_cache =
        pingora_proxy::http_proxy_service(&my_server.configuration, ExampleProxyCache {});
    proxy_service_cache.add_tcp("0.0.0.0:6148");

    #[cfg(feature = "any_tls")]
    {
        let cert_path = format!("{}/tests/keys/server.crt", env!("CARGO_MANIFEST_DIR"));
        let key_path = format!("{}/tests/keys/key.pem", env!("CARGO_MANIFEST_DIR"));

        let mut tls_settings =
            pingora_core::listeners::tls::TlsSettings::intermediate(&cert_path, &key_path).unwrap();
        tls_settings.enable_h2();
        proxy_service_cache.add_tls_with_settings("0.0.0.0:6153", None, tls_settings);
    }

    let mut services: Vec<Box<dyn ServiceWithDependents>> = vec![
        Box::new(proxy_service_h2c),
        Box::new(proxy_service_http),
        Box::new(proxy_service_http_connect),
        Box::new(proxy_service_cache),
    ];

    if let Some(proxy_service_https) = proxy_service_https_opt {
        services.push(proxy_service_https)
    }

    set_compression_dict_path("tests/headers.dict");
    my_server.add_services(services);
    my_server.run_forever();
}

pub struct Server {
    pub handle: thread::JoinHandle<()>,
}

impl Server {
    pub fn start() -> Self {
        let server_handle = thread::spawn(|| {
            test_main();
        });
        Server {
            handle: server_handle,
        }
    }
}

#[cfg(feature = "s2n")]
pub struct PskTlsServer {
    pub handle: thread::JoinHandle<()>,
}

#[cfg(feature = "s2n")]
impl PskTlsServer {
    pub fn start() -> Self {
        let server_handle = thread::spawn(|| {
            let rt = tokio::runtime::Runtime::new().unwrap();
            rt.block_on(Self::run_server());
        });
        PskTlsServer {
            handle: server_handle,
        }
    }

    async fn run_server() {
        use pingora_core::{protocols::tls::S2NConnectionBuilder, tls::TlsAcceptor};
        use pingora_core::{
            protocols::tls::{Psk, PskConfig, PskType},
            tls::{Config, PskHmac, S2NPolicy, DEFAULT_TLS13},
        };
        use tokio::net::TcpListener;

        let psk = Psk::new(
            TEST_PSK_IDENTITY.to_string(),
            TEST_PSK_SECRET.as_bytes().to_vec(),
            PskHmac::SHA256,
        );
        let psk_config = Arc::new(PskConfig::new(vec![psk]));

        let addr: std::net::SocketAddr = "127.0.0.1:6151".parse().unwrap();
        let listener = TcpListener::bind(addr).await.unwrap();
        let mut config_builder = Config::builder();
        unsafe {
            config_builder.disable_x509_verification();
        }
        config_builder.set_security_policy(&DEFAULT_TLS13).unwrap();
        let config = config_builder.build().unwrap();

        let connection_builder = S2NConnectionBuilder {
            config: config.clone(),
            psk_config: Some(psk_config.clone()),
            security_policy: None,
        };

        let acceptor = TlsAcceptor::new(connection_builder);

        loop {
            use tokio::{io::AsyncWriteExt, net::tcp};
            let (tcp_stream, _) = listener.accept().await.unwrap();
            let mut stream = acceptor.clone().accept(tcp_stream).await.unwrap();
            let response = b"HTTP/1.1 200 OK\r\nContent-Length: 5\r\n\r\nhello";
            stream.write(response).await.unwrap();
            stream.shutdown().await;
        }
    }
}

// FIXME: this still allows multiple servers to spawn across integration tests
pub static TEST_SERVER: Lazy<Server> = Lazy::new(Server::start);
#[cfg(feature = "s2n")]
pub static TEST_PSK_TLS_SERVER: Lazy<PskTlsServer> = Lazy::new(PskTlsServer::start);
use super::mock_origin::MOCK_ORIGIN;

pub fn init() {
    let _ = *TEST_SERVER;
    let _ = *MOCK_ORIGIN;
    #[cfg(feature = "s2n")]
    let _ = *TEST_PSK_TLS_SERVER;
}


================================================
FILE: pingora-proxy/tests/utils/websocket/mod.rs
================================================
mod ws_echo;
mod ws_echo_raw;

pub use ws_echo::WS_ECHO;
pub use ws_echo_raw::WS_ECHO_RAW;


================================================
FILE: pingora-proxy/tests/utils/websocket/ws_echo.rs
================================================
// Copyright 2025 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::{io::Error, thread, time::Duration};

use futures_util::{SinkExt, StreamExt};
use log::debug;
use std::sync::LazyLock;
use tokio::{
    net::{TcpListener, TcpStream},
    runtime::Builder,
};

pub static WS_ECHO: LazyLock<bool> = LazyLock::new(init);
pub const WS_ECHO_ORIGIN_PORT: u16 = 9283;

fn init() -> bool {
    thread::spawn(move || {
        let runtime = Builder::new_current_thread()
            .thread_name("websocket echo")
            .enable_all()
            .build()
            .unwrap();
        runtime.block_on(async move {
            server(&format!("127.0.0.1:{WS_ECHO_ORIGIN_PORT}"))
                .await
                .unwrap();
        })
    });
    thread::sleep(Duration::from_millis(200));
    true
}

async fn server(addr: &str) -> Result<(), Error> {
    let listener = TcpListener::bind(&addr).await.unwrap();
    while let Ok((stream, _)) = listener.accept().await {
        tokio::spawn(handle_connection(stream));
    }
    Ok(())
}

async fn handle_connection(stream: TcpStream) {
    let mut ws_stream = tokio_tungstenite::accept_async(stream).await.unwrap();

    while let Some(msg) = ws_stream.next().await {
        let msg = msg.unwrap();
        let echo = msg.clone();
        if msg.is_text() {
            let data = msg.into_text().unwrap();
            if data.contains("close") {
                // abruptly close the stream without WS close;
                debug!("abrupt close");
                return;
            } else if data.contains("graceful") {
                debug!("graceful close");
                ws_stream.close(None).await.unwrap();
                // close() only sends frame
                return;
            } else {
                ws_stream.send(echo).await.unwrap();
            }
        }
    }
}


================================================
FILE: pingora-proxy/tests/utils/websocket/ws_echo_raw.rs
================================================
// Copyright 2025 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use std::{thread, time::Duration};

use futures_util::{SinkExt, StreamExt};
use log::debug;
use pingora_error::{Error, ErrorType::*, OrErr, Result};
use pingora_http::RequestHeader;
use std::sync::LazyLock;
use tokio::{
    io::{AsyncReadExt, AsyncWriteExt},
    net::{
        tcp::{OwnedReadHalf, OwnedWriteHalf},
        TcpListener, TcpStream,
    },
    runtime::Builder,
};

pub static WS_ECHO_RAW: LazyLock<bool> = LazyLock::new(init);
pub const WS_ECHO_RAW_ORIGIN_PORT: u16 = 9284;

fn init() -> bool {
    thread::spawn(move || {
        let runtime = Builder::new_current_thread()
            .thread_name("websocket raw echo")
            .enable_all()
            .build()
            .unwrap();
        runtime.block_on(async move {
            server(&format!("127.0.0.1:{WS_ECHO_RAW_ORIGIN_PORT}"))
                .await
                .unwrap();
        })
    });
    thread::sleep(Duration::from_millis(200));
    true
}

async fn server(addr: &str) -> Result<(), Error> {
    let listener = TcpListener::bind(&addr).await.unwrap();
    while let Ok((stream, _)) = listener.accept().await {
        tokio::spawn(handle_connection(stream));
    }
    Ok(())
}

async fn read_request_header(stream: &mut TcpStream) -> Result<(RequestHeader, Vec<u8>)> {
    fn parse_request_header(buf: &[u8]) -> Result<RequestHeader> {
        let mut headers = vec![httparse::EMPTY_HEADER; 256];
        let mut parsed = httparse::Request::new(&mut headers);
        match parsed
            .parse(buf)
            .or_err(ReadError, "request header parse error")?
        {
            httparse::Status::Complete(_) => {
                let mut req = RequestHeader::build(
                    parsed.method.unwrap_or(""),
                    parsed.path.unwrap_or("").as_bytes(),
                    Some(parsed.headers.len()),
                )?;
                for header in parsed.headers.iter() {
                    req.append_header(header.name.to_string(), header.value)
                        .unwrap();
                }
                Ok(req)
            }
            _ => Error::e_explain(ReadError, "should have full request header"),
        }
    }

    let mut request = vec![];
    let mut header_end = 0;
    let mut buf = [0; 1024];
    loop {
        let n = stream
            .read(&mut buf)
            .await
            .or_err(ReadError, "while reading request header")?;
        request.extend_from_slice(&buf[..n]);
        let mut end_of_header = false;
        for (i, w) in request.windows(4).enumerate() {
            if w == b"\r\n\r\n" {
                end_of_header = true;
                header_end = i + 4;
                break;
            }
        }
        if end_of_header {
            break;
        }
    }
    Ok((
        parse_request_header(&request[..header_end])?,
        request[header_end..].to_vec(),
    ))
}

async fn read_body_until_close(
    stream: &mut OwnedReadHalf,
) -> Result<Option<Vec<u8>>, std::io::Error> {
    let mut buf = [0; 1024];
    let n = stream.read(&mut buf).await?;
    if n == 0 {
        return Ok(None);
    }
    Ok(Some(buf[..n].to_vec()))
}

async fn write_body_until_close(
    stream: &mut OwnedWriteHalf,
    body: &[u8],
) -> Result<Option<usize>, std::io::Error> {
    let n = stream.write(body).await?;
    Ok((n != 0).then_some(n))
}

async fn handle_connection(mut stream: TcpStream) -> Result<()> {
    let (header, preread_body) = read_request_header(&mut stream).await?;

    // if x-expected-body-len unset, continue to read until stream is closed
    let expected_body_len = header
        .headers
        .get("x-expected-body-len")
        .and_then(|v| std::str::from_utf8(v.as_bytes()).ok())
        .and_then(|s| s.parse().ok());

    let resp_raw =
        b"HTTP/1.1 101 Switching Protocols\r\nConnection: upgrade\r\nUpgrade: websocket\r\n\r\n";
    stream
        .write_all(resp_raw)
        .await
        .or_err(WriteError, "while writing 101")?;

    let (mut stream_read, mut stream_write) = stream.into_split();
    let mut request_body = preread_body;
    let mut body_read = request_body.len();
    let mut body_read_done = false;

    loop {
        tokio::select! {
            res = read_body_until_close(&mut stream_read), if !body_read_done => {
                let Some(buf) = res.or_err(ReadError, "while reading body")? else {
                    return Ok(());
                };
                body_read += buf.len();
                body_read_done = expected_body_len.is_some_and(|len| body_read >= len);
                request_body.extend_from_slice(&buf[..]);
            }
            res = write_body_until_close(&mut stream_write, &request_body[..]), if !request_body.is_empty() => {
                let Some(n) = res.or_err(WriteError, "while writing body")? else {
                    return Ok(());
                };
                request_body = request_body[n..].to_vec();
            }
            else => break,
        }
    }
    if let Some(expected) = expected_body_len {
        if body_read > expected {
            return Error::e_explain(ReadError, "read {body_read} bytes, expected {expected}");
        }
    }
    Ok(())
}


================================================
FILE: pingora-runtime/Cargo.toml
================================================
[package]
name = "pingora-runtime"
version = "0.8.0"
authors = ["Yuchen Wu <yuchen@cloudflare.com>"]
license = "Apache-2.0"
edition = "2021"
repository = "https://github.com/cloudflare/pingora"
categories = ["asynchronous", "network-programming"]
keywords = ["async", "non-blocking", "pingora"]
description = """
Multithreaded Tokio runtime with the option of disabling work stealing.
"""

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "pingora_runtime"
path = "src/lib.rs"

[dependencies]
rand = "0.8"
tokio = { workspace = true, features = ["rt-multi-thread", "sync", "time"] }
once_cell = { workspace = true }
thread_local = "1"

[dev-dependencies]
tokio = { workspace = true, features = ["io-util", "net"] }

[[bench]]
name = "hello"
harness = false


================================================
FILE: pingora-runtime/LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: pingora-runtime/benches/hello.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Pingora tokio runtime.
//!
//! Tokio runtime comes in two flavors: a single-threaded runtime
//! and a multi-threaded one which provides work stealing.
//! Benchmark shows that, compared to the single-threaded runtime, the multi-threaded one
//! has some overhead due to its more sophisticated work steal scheduling.
//!
//! This crate provides a third flavor: a multi-threaded runtime without work stealing.
//! This flavor is as efficient as the single-threaded runtime while allows the async
//! program to use multiple cores.

use pingora_runtime::{current_handle, Runtime};
use std::error::Error;
use std::{thread, time};
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio::net::TcpListener;

async fn hello_server(port: usize) -> Result<(), Box<dyn Error + Send>> {
    let addr = format!("127.0.0.1:{port}");
    let listener = TcpListener::bind(&addr).await.unwrap();
    println!("Listening on: {}", addr);

    loop {
        let (mut socket, _) = listener.accept().await.unwrap();
        socket.set_nodelay(true).unwrap();
        let rt = current_handle();
        rt.spawn(async move {
            loop {
                let mut buf = [0; 1024];
                let res = socket.read(&mut buf).await;

                let n = match res {
                    Ok(n) => n,
                    Err(_) => return,
                };

                if n == 0 {
                    return;
                }

                let _ = socket
                    .write_all(
                        b"HTTP/1.1 200 OK\r\ncontent-length: 12\r\nconnection: keep-alive\r\n\r\nHello world!",
                    )
                    .await;
            }
        });
    }
}

/* On M1 macbook pro
wrk -t40 -c1000  -d10  http://127.0.0.1:3001  --latency
Running 10s test @ http://127.0.0.1:3001
  40 threads and 1000 connections
  Thread Stats   Avg      Stdev     Max   +/- Stdev
    Latency     3.53ms    0.87ms  17.12ms   84.99%
    Req/Sec     7.09k     1.29k   33.11k    93.30%
  Latency Distribution
     50%    3.56ms
     75%    3.95ms
     90%    4.37ms
     99%    5.38ms
  2844034 requests in 10.10s, 203.42MB read
Requests/sec: 281689.27
Transfer/sec:     20.15MB

wrk -t40 -c1000  -d10  http://127.0.0.1:3000  --latency
Running 10s test @ http://127.0.0.1:3000
  40 threads and 1000 connections
  Thread Stats   Avg      Stdev     Max   +/- Stdev
    Latency    12.16ms   16.29ms 112.29ms   83.40%
    Req/Sec     5.47k     2.01k   48.85k    83.67%
  Latency Distribution
     50%    2.09ms
     75%   20.23ms
     90%   37.11ms
     99%   65.16ms
  2190869 requests in 10.10s, 156.70MB read
Requests/sec: 216918.71
Transfer/sec:     15.52MB
*/

fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
    let rt = Runtime::new_steal(2, "");
    let handle = rt.get_handle();
    handle.spawn(hello_server(3000));
    let rt2 = Runtime::new_no_steal(2, "");
    let handle = rt2.get_handle();
    handle.spawn(hello_server(3001));
    thread::sleep(time::Duration::from_secs(999999999));
    Ok(())
}


================================================
FILE: pingora-runtime/src/lib.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Pingora tokio runtime.
//!
//! Tokio runtime comes in two flavors: a single-threaded runtime
//! and a multi-threaded one which provides work stealing.
//! Benchmark shows that, compared to the single-threaded runtime, the multi-threaded one
//! has some overhead due to its more sophisticated work steal scheduling.
//!
//! This crate provides a third flavor: a multi-threaded runtime without work stealing.
//! This flavor is as efficient as the single-threaded runtime while allows the async
//! program to use multiple cores.

use once_cell::sync::{Lazy, OnceCell};
use rand::Rng;
use std::sync::Arc;
use std::thread::JoinHandle;
use std::time::Duration;
use thread_local::ThreadLocal;
use tokio::runtime::{Builder, Handle};
use tokio::sync::oneshot::{channel, Sender};

/// Pingora async multi-threaded runtime
///
/// The `Steal` flavor is effectively tokio multi-threaded runtime.
///
/// The `NoSteal` flavor is backed by multiple tokio single-threaded runtime.
pub enum Runtime {
    Steal(tokio::runtime::Runtime),
    NoSteal(NoStealRuntime),
}

impl Runtime {
    /// Create a `Steal` flavor runtime. This just a regular tokio runtime
    pub fn new_steal(threads: usize, name: &str) -> Self {
        Self::Steal(
            Builder::new_multi_thread()
                .enable_all()
                .worker_threads(threads)
                .thread_name(name)
                .build()
                .unwrap(),
        )
    }

    /// Create a `NoSteal` flavor runtime. This is backed by multiple tokio current-thread runtime
    pub fn new_no_steal(threads: usize, name: &str) -> Self {
        Self::NoSteal(NoStealRuntime::new(threads, name))
    }

    /// Return the &[Handle] of the [Runtime].
    /// For `Steal` flavor, it will just return the &[Handle].
    /// For `NoSteal` flavor, it will return the &[Handle] of a random thread in its pool.
    /// So if we want tasks to spawn on all the threads, call this function to get a fresh [Handle]
    /// for each async task.
    pub fn get_handle(&self) -> &Handle {
        match self {
            Self::Steal(r) => r.handle(),
            Self::NoSteal(r) => r.get_runtime(),
        }
    }

    /// Call tokio's `shutdown_timeout` of all the runtimes. This function is blocking until
    /// all runtimes exit.
    pub fn shutdown_timeout(self, timeout: Duration) {
        match self {
            Self::Steal(r) => r.shutdown_timeout(timeout),
            Self::NoSteal(r) => r.shutdown_timeout(timeout),
        }
    }
}

// only NoStealRuntime set the pools in thread threads
static CURRENT_HANDLE: Lazy<ThreadLocal<Pools>> = Lazy::new(ThreadLocal::new);

/// Return the [Handle] of current runtime.
/// If the current thread is under a `Steal` runtime, the current [Handle] is returned.
/// If the current thread is under a `NoSteal` runtime, the [Handle] of a random thread
/// under this runtime is returned. This function will panic if called outside any runtime.
pub fn current_handle() -> Handle {
    if let Some(pools) = CURRENT_HANDLE.get() {
        // safety: the CURRENT_HANDLE is set when the pool is being initialized in init_pools()
        let pools = pools.get().unwrap();
        let mut rng = rand::thread_rng();
        let index = rng.gen_range(0..pools.len());
        pools[index].clone()
    } else {
        // not NoStealRuntime, just check the current tokio runtime
        Handle::current()
    }
}

type Control = (Sender<Duration>, JoinHandle<()>);
type Pools = Arc<OnceCell<Box<[Handle]>>>;

/// Multi-threaded runtime backed by a pool of single threaded tokio runtime
pub struct NoStealRuntime {
    threads: usize,
    name: String,
    // Lazily init the runtimes so that they are created after pingora
    // daemonize itself. Otherwise the runtime threads are lost.
    pools: Pools,
    controls: OnceCell<Vec<Control>>,
}

impl NoStealRuntime {
    /// Create a new [NoStealRuntime]. Panic if `threads` is 0
    pub fn new(threads: usize, name: &str) -> Self {
        assert!(threads != 0);
        NoStealRuntime {
            threads,
            name: name.to_string(),
            pools: Arc::new(OnceCell::new()),
            controls: OnceCell::new(),
        }
    }

    fn init_pools(&self) -> (Box<[Handle]>, Vec<Control>) {
        let mut pools = Vec::with_capacity(self.threads);
        let mut controls = Vec::with_capacity(self.threads);
        for _ in 0..self.threads {
            let rt = Builder::new_current_thread().enable_all().build().unwrap();
            let handler = rt.handle().clone();
            let (tx, rx) = channel::<Duration>();
            let pools_ref = self.pools.clone();
            let join = std::thread::Builder::new()
                .name(self.name.clone())
                .spawn(move || {
                    CURRENT_HANDLE.get_or(|| pools_ref);
                    if let Ok(timeout) = rt.block_on(rx) {
                        rt.shutdown_timeout(timeout);
                    } // else Err(_): tx is dropped, just exit
                })
                .unwrap();
            pools.push(handler);
            controls.push((tx, join));
        }

        (pools.into_boxed_slice(), controls)
    }

    /// Return the &[Handle] of a random thread of this runtime
    pub fn get_runtime(&self) -> &Handle {
        let mut rng = rand::thread_rng();

        let index = rng.gen_range(0..self.threads);
        self.get_runtime_at(index)
    }

    /// Return the number of threads of this runtime
    pub fn threads(&self) -> usize {
        self.threads
    }

    fn get_pools(&self) -> &[Handle] {
        if let Some(p) = self.pools.get() {
            p
        } else {
            // TODO: use a mutex to avoid creating a lot threads only to drop them
            let (pools, controls) = self.init_pools();
            // there could be another thread racing with this one to init the pools
            match self.pools.try_insert(pools) {
                Ok(p) => {
                    // unwrap to make sure that this is the one that init both pools and controls
                    self.controls.set(controls).unwrap();
                    p
                }
                // another thread already set it, just return it
                Err((p, _my_pools)) => p,
            }
        }
    }

    /// Return the &[Handle] of a given thread of this runtime
    pub fn get_runtime_at(&self, index: usize) -> &Handle {
        let pools = self.get_pools();
        &pools[index]
    }

    /// Call tokio's `shutdown_timeout` of all the runtimes. This function is blocking until
    /// all runtimes exit.
    pub fn shutdown_timeout(mut self, timeout: Duration) {
        if let Some(controls) = self.controls.take() {
            let (txs, joins): (Vec<Sender<_>>, Vec<JoinHandle<()>>) = controls.into_iter().unzip();
            for tx in txs {
                let _ = tx.send(timeout); // Err() when rx is dropped
            }
            for join in joins {
                let _ = join.join(); // ignore thread error
            }
        } // else, the controls and the runtimes are not even init yet, just return;
    }

    // TODO: runtime metrics
}

#[test]
fn test_steal_runtime() {
    use tokio::time::{sleep, Duration};
    let threads = 2;
    let rt = Runtime::new_steal(threads, "test");
    let handle = rt.get_handle();
    let ret = handle.block_on(async {
        sleep(Duration::from_secs(1)).await;
        let handle = current_handle();
        let join = handle.spawn(async {
            sleep(Duration::from_secs(1)).await;
        });
        join.await.unwrap();
        1
    });

    #[cfg(target_os = "linux")]
    assert_eq!(handle.metrics().num_workers(), threads);
    assert_eq!(ret, 1);
}

#[test]
fn test_no_steal_runtime() {
    use tokio::time::{sleep, Duration};

    let rt = Runtime::new_no_steal(2, "test");
    let handle = rt.get_handle();
    let ret = handle.block_on(async {
        sleep(Duration::from_secs(1)).await;
        let handle = current_handle();
        let join = handle.spawn(async {
            sleep(Duration::from_secs(1)).await;
        });
        join.await.unwrap();
        1
    });

    assert_eq!(ret, 1);
}

#[test]
fn test_no_steal_shutdown() {
    use tokio::time::{sleep, Duration};

    let rt = Runtime::new_no_steal(2, "test");
    let handle = rt.get_handle();
    let ret = handle.block_on(async {
        sleep(Duration::from_secs(1)).await;
        let handle = current_handle();
        let join = handle.spawn(async {
            sleep(Duration::from_secs(1)).await;
        });
        join.await.unwrap();
        1
    });
    assert_eq!(ret, 1);

    rt.shutdown_timeout(Duration::from_secs(1));
}


================================================
FILE: pingora-rustls/Cargo.toml
================================================
[package]
name = "pingora-rustls"
version = "0.8.0"
license = "Apache-2.0"
edition = "2021"
repository = "https://github.com/cloudflare/pingora"
categories = ["asynchronous", "network-programming"]
keywords = ["async", "tls", "ssl", "pingora"]
description = """
RusTLS async APIs for Pingora.
"""

[lib]
name = "pingora_rustls"
path = "src/lib.rs"

[dependencies]
log = "0.4.21"
pingora-error = { version = "0.8.0", path = "../pingora-error"}
ring = "0.17.12"
rustls = "0.23.12"
rustls-native-certs = "0.7.1"
rustls-pemfile = "2.1.2"
rustls-pki-types = "1.7.0"
tokio-rustls = "0.26.0"
no_debug = "3.1.0"


================================================
FILE: pingora-rustls/LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: pingora-rustls/src/lib.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! This module contains all the rustls specific pingora integration for things
//! like loading certificates and private keys

#![warn(clippy::all)]

use std::fs::File;
use std::io::BufReader;
use std::path::Path;

use log::warn;
pub use no_debug::{Ellipses, NoDebug, WithTypeInfo};
use pingora_error::{Error, ErrorType, OrErr, Result};

pub use rustls::server::danger::{ClientCertVerified, ClientCertVerifier};
pub use rustls::server::{ClientCertVerifierBuilder, WebPkiClientVerifier};
pub use rustls::{
    client::WebPkiServerVerifier, version, CertificateError, ClientConfig, DigitallySignedStruct,
    Error as RusTlsError, KeyLogFile, RootCertStore, ServerConfig, SignatureScheme, Stream,
};
pub use rustls_native_certs::load_native_certs;
use rustls_pemfile::Item;
pub use rustls_pki_types::{CertificateDer, PrivateKeyDer, ServerName, UnixTime};
pub use tokio_rustls::client::TlsStream as ClientTlsStream;
pub use tokio_rustls::server::TlsStream as ServerTlsStream;
pub use tokio_rustls::{Accept, Connect, TlsAcceptor, TlsConnector, TlsStream};

// This allows to skip certificate verification. Be highly cautious.
pub use rustls::client::danger::{HandshakeSignatureValid, ServerCertVerified, ServerCertVerifier};

/// Load the given file from disk as a buffered reader and use the pingora Error
/// type instead of the std::io version
fn load_file<P>(path: P) -> Result<BufReader<File>>
where
    P: AsRef<Path>,
{
    File::open(path)
        .or_err(ErrorType::FileReadError, "Failed to load file")
        .map(BufReader::new)
}

/// Read the pem file at the given path from disk
fn load_pem_file<P>(path: P) -> Result<Vec<Item>>
where
    P: AsRef<Path>,
{
    rustls_pemfile::read_all(&mut load_file(path)?)
        .map(|item_res| {
            item_res.or_err(
                ErrorType::InvalidCert,
                "Certificate in pem file could not be read",
            )
        })
        .collect()
}

/// Load the certificates from the given pem file path into the given
/// certificate store
pub fn load_ca_file_into_store<P>(path: P, cert_store: &mut RootCertStore) -> Result<()>
where
    P: AsRef<Path>,
{
    for pem_item in load_pem_file(path)? {
        // only loading certificates, handling a CA file
        let Item::X509Certificate(content) = pem_item else {
            return Error::e_explain(
                ErrorType::InvalidCert,
                "Pem file contains un-loadable certificate type",
            );
        };
        cert_store.add(content).or_err(
            ErrorType::InvalidCert,
            "Failed to load X509 certificate into root store",
        )?;
    }

    Ok(())
}

/// Attempt to load the native cas into the given root-certificate store
pub fn load_platform_certs_incl_env_into_store(ca_certs: &mut RootCertStore) -> Result<()> {
    // this includes handling of ENV vars SSL_CERT_FILE & SSL_CERT_DIR
    for cert in load_native_certs()
        .or_err(ErrorType::InvalidCert, "Failed to load native certificates")?
        .into_iter()
    {
        ca_certs.add(cert).or_err(
            ErrorType::InvalidCert,
            "Failed to load native certificate into root store",
        )?;
    }

    Ok(())
}

/// Load the certificates and private key files
pub fn load_certs_and_key_files<'a>(
    cert: &str,
    key: &str,
) -> Result<Option<(Vec<CertificateDer<'a>>, PrivateKeyDer<'a>)>> {
    let certs_file = load_pem_file(cert)?;
    let key_file = load_pem_file(key)?;

    let certs = certs_file
        .into_iter()
        .filter_map(|item| {
            if let Item::X509Certificate(cert) = item {
                Some(cert)
            } else {
                None
            }
        })
        .collect::<Vec<_>>();

    // These are the currently supported pk types -
    // [https://doc.servo.org/rustls/key/struct.PrivateKey.html]
    let private_key_opt = key_file
        .into_iter()
        .filter_map(|key_item| match key_item {
            Item::Pkcs1Key(key) => Some(PrivateKeyDer::from(key)),
            Item::Pkcs8Key(key) => Some(PrivateKeyDer::from(key)),
            Item::Sec1Key(key) => Some(PrivateKeyDer::from(key)),
            _ => None,
        })
        .next();

    if let (Some(private_key), false) = (private_key_opt, certs.is_empty()) {
        Ok(Some((certs, private_key)))
    } else {
        Ok(None)
    }
}

/// Load the certificate
pub fn load_pem_file_ca(path: &String) -> Result<Vec<u8>> {
    let mut reader = load_file(path)?;
    let cas_file_items = rustls_pemfile::certs(&mut reader)
        .map(|item_res| {
            item_res.or_err(
                ErrorType::InvalidCert,
                "Failed to load certificate from file",
            )
        })
        .collect::<Result<Vec<_>>>()?;

    Ok(cas_file_items
        .first()
        .map(|ca| ca.to_vec())
        .unwrap_or_default())
}

pub fn load_pem_file_private_key(path: &String) -> Result<Vec<u8>> {
    Ok(rustls_pemfile::private_key(&mut load_file(path)?)
        .or_err(
            ErrorType::InvalidCert,
            "Failed to load private key from file",
        )?
        .map(|key| key.secret_der().to_vec())
        .unwrap_or_default())
}

pub fn hash_certificate(cert: &CertificateDer) -> Vec<u8> {
    let hash = ring::digest::digest(&ring::digest::SHA256, cert.as_ref());
    hash.as_ref().to_vec()
}


================================================
FILE: pingora-s2n/Cargo.toml
================================================
[package]
name = "pingora-s2n"
version = "0.8.0"
license = "Apache-2.0"
edition = "2021"
repository = "https://github.com/cloudflare/pingora"
categories = ["asynchronous", "network-programming"]
keywords = ["async", "tls", "ssl", "pingora"]
description = """
S2N async APIs for Pingora.
"""

[lib]
name = "pingora_s2n"
path = "src/lib.rs"

[dependencies]
pingora-error = { version = "0.8.0", path = "../pingora-error"}
ring = "0.17.12"
s2n-tls = "0.3"
s2n-tls-tokio = "0.3"


================================================
FILE: pingora-s2n/LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: pingora-s2n/src/lib.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applijable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use pingora_error::{Error, ErrorType, Result};
use std::fs;

pub use s2n_tls::{
    callbacks::VerifyHostNameCallback,
    config::{Builder as ConfigBuilder, Config},
    connection::{Builder as ConnectionBuilder, Connection},
    enums::{ClientAuthType, Mode, PskHmac},
    error::Error as S2NError,
    psk::Psk,
    security::{Policy as S2NPolicy, DEFAULT_TLS13},
};
pub use s2n_tls_tokio::{TlsAcceptor, TlsConnector, TlsStream};

pub fn load_certs_and_key_files(cert_file: &str, key_file: &str) -> Result<(Vec<u8>, Vec<u8>)> {
    let cert_bytes = load_pem_file(cert_file)?;
    let key_bytes = load_pem_file(key_file)?;
    Ok((cert_bytes, key_bytes))
}

pub fn load_pem_file(file: &str) -> Result<Vec<u8>> {
    if let Ok(bytes) = fs::read(file) {
        Ok(bytes)
    } else {
        Error::e_explain(
            ErrorType::InvalidCert,
            "Certificate in pem file could not be read",
        )
    }
}

pub fn hash_certificate(cert: &[u8]) -> Vec<u8> {
    let hash = ring::digest::digest(&ring::digest::SHA256, cert);
    hash.as_ref().to_vec()
}

/// Verify host name callback that always returns a success,
/// effectively ignoring hostname validation
pub struct IgnoreVerifyHostnameCallback {}

impl IgnoreVerifyHostnameCallback {
    pub fn new() -> Self {
        IgnoreVerifyHostnameCallback {}
    }
}

impl Default for IgnoreVerifyHostnameCallback {
    fn default() -> Self {
        Self::new()
    }
}

impl VerifyHostNameCallback for IgnoreVerifyHostnameCallback {
    fn verify_host_name(&self, _host_name: &str) -> bool {
        true
    }
}


================================================
FILE: pingora-timeout/Cargo.toml
================================================
[package]
name = "pingora-timeout"
version = "0.8.0"
authors = ["Yuchen Wu <yuchen@cloudflare.com>"]
license = "Apache-2.0"
edition = "2021"
repository = "https://github.com/cloudflare/pingora"
categories = ["asynchronous"]
keywords = ["async", "non-blocking", "pingora"]
description = """
Highly efficient async timer and timeout system for Tokio runtimes.
"""

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "pingora_timeout"
path = "src/lib.rs"

[dependencies]
tokio = { workspace = true, features = [
    "time",
    "rt-multi-thread",
    "macros",
    "sync",
] }
pin-project-lite = "0.2"
once_cell = { workspace = true }
parking_lot = "0.12"
thread_local = "1.0"

[dev-dependencies]
bencher = "0.1.5"

[[bench]]
name = "benchmark"
harness = false


================================================
FILE: pingora-timeout/LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: pingora-timeout/benches/benchmark.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use pingora_timeout::*;
use std::time::{Duration, Instant};
use tokio::time::sleep;
use tokio::time::timeout as tokio_timeout;

const LOOP_SIZE: u32 = 100000;

async fn bench_timeout() -> u32 {
    let mut n = 0;
    for _ in 0..LOOP_SIZE {
        let fut = async { 1 };
        let to = timeout(Duration::from_secs(1), fut);
        n += to.await.unwrap();
    }
    n
}

async fn bench_tokio_timeout() -> u32 {
    let mut n = 0;
    for _ in 0..LOOP_SIZE {
        let fut = async { 1 };
        let to = tokio_timeout(Duration::from_secs(1), fut);
        n += to.await.unwrap();
    }
    n
}

async fn bench_fast_timeout() -> u32 {
    let mut n = 0;
    for _ in 0..LOOP_SIZE {
        let fut = async { 1 };
        let to = fast_timeout::fast_timeout(Duration::from_secs(1), fut);
        n += to.await.unwrap();
    }
    n
}

fn bench_tokio_timer() {
    let mut list = Vec::with_capacity(LOOP_SIZE as usize);
    let before = Instant::now();
    for _ in 0..LOOP_SIZE {
        list.push(sleep(Duration::from_secs(1)));
    }
    let elapsed = before.elapsed();
    println!(
        "tokio timer create {:?} total, {:?} avg per iteration",
        elapsed,
        elapsed / LOOP_SIZE
    );

    let before = Instant::now();
    drop(list);
    let elapsed = before.elapsed();
    println!(
        "tokio timer drop {:?} total, {:?} avg per iteration",
        elapsed,
        elapsed / LOOP_SIZE
    );
}

async fn bench_multi_thread_tokio_timer(threads: usize) {
    let mut handlers = vec![];
    for _ in 0..threads {
        let handler = tokio::spawn(async {
            bench_tokio_timer();
        });
        handlers.push(handler);
    }
    for thread in handlers {
        thread.await.unwrap();
    }
}

use std::sync::Arc;

async fn bench_multi_thread_timer(threads: usize, tm: Arc<TimerManager>) {
    let mut handlers = vec![];
    for _ in 0..threads {
        let tm_ref = tm.clone();
        let handler = tokio::spawn(async move {
            bench_timer(&tm_ref);
        });
        handlers.push(handler);
    }
    for thread in handlers {
        thread.await.unwrap();
    }
}

use pingora_timeout::timer::TimerManager;

fn bench_timer(tm: &TimerManager) {
    let mut list = Vec::with_capacity(LOOP_SIZE as usize);
    let before = Instant::now();
    for _ in 0..LOOP_SIZE {
        list.push(tm.register_timer(Duration::from_secs(1)));
    }
    let elapsed = before.elapsed();
    println!(
        "pingora timer create {:?} total, {:?} avg per iteration",
        elapsed,
        elapsed / LOOP_SIZE
    );

    let before = Instant::now();
    drop(list);
    let elapsed = before.elapsed();
    println!(
        "pingora timer drop {:?} total, {:?} avg per iteration",
        elapsed,
        elapsed / LOOP_SIZE
    );
}

#[tokio::main(worker_threads = 4)]
async fn main() {
    let before = Instant::now();
    bench_timeout().await;
    let elapsed = before.elapsed();
    println!(
        "pingora timeout {:?} total, {:?} avg per iteration",
        elapsed,
        elapsed / LOOP_SIZE
    );

    let before = Instant::now();
    bench_fast_timeout().await;
    let elapsed = before.elapsed();
    println!(
        "pingora fast timeout {:?} total, {:?} avg per iteration",
        elapsed,
        elapsed / LOOP_SIZE
    );

    let before = Instant::now();
    bench_tokio_timeout().await;
    let elapsed = before.elapsed();
    println!(
        "tokio timeout {:?} total, {:?} avg per iteration",
        elapsed,
        elapsed / LOOP_SIZE
    );

    println!("===========================");

    let tm = pingora_timeout::timer::TimerManager::new();
    bench_timer(&tm);
    bench_tokio_timer();

    println!("===========================");

    let tm = Arc::new(tm);
    bench_multi_thread_timer(4, tm).await;
    bench_multi_thread_tokio_timer(4).await;
}


================================================
FILE: pingora-timeout/src/fast_timeout.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! The fast and more complicated version of pingora-timeout
//!
//! The following optimizations are applied:
//! - The timeouts lazily initialize their timer when the Future is pending for the first time.
//! - There is no global lock for creating and cancelling timeouts.
//! - Timeout timers are rounded to the next 10ms tick and timers are shared across all timeouts with the same deadline.
//!
//! In order for this to work, a standalone thread is created to arm the timers, which has some
//! overhead. As a general rule, the benefits of this don't outweigh the overhead unless
//! there are more than about 100 timeout() calls/sec in the system. Use regular tokio timeout or
//! [super::tokio_timeout] in the low usage case.

use super::timer::*;
use super::*;
use once_cell::sync::Lazy;
use std::sync::Arc;

static TIMER_MANAGER: Lazy<Arc<TimerManager>> = Lazy::new(|| {
    let tm = Arc::new(TimerManager::new());
    check_clock_thread(&tm);
    tm
});

fn check_clock_thread(tm: &Arc<TimerManager>) {
    if tm.should_i_start_clock() {
        std::thread::Builder::new()
            .name("Timer thread".into())
            .spawn(|| TIMER_MANAGER.clock_thread())
            .unwrap();
    }
}

/// The timeout generated by [fast_timeout()].
///
/// Users don't need to interact with this object.
pub struct FastTimeout(Duration);

impl ToTimeout for FastTimeout {
    fn timeout(&self) -> Pin<Box<dyn Future<Output = ()> + Send + Sync>> {
        Box::pin(TIMER_MANAGER.register_timer(self.0).poll())
    }

    fn create(d: Duration) -> Self {
        FastTimeout(d)
    }
}

/// Similar to [tokio::time::timeout] but more efficient.
pub fn fast_timeout<T>(duration: Duration, future: T) -> Timeout<T, FastTimeout>
where
    T: Future,
{
    check_clock_thread(&TIMER_MANAGER);
    Timeout::new_with_delay(future, duration)
}

/// Similar to [tokio::time::sleep] but more efficient.
pub async fn fast_sleep(duration: Duration) {
    check_clock_thread(&TIMER_MANAGER);
    TIMER_MANAGER.register_timer(duration).poll().await
}

/// Pause the timer for fork()
///
/// Because RwLock across fork() is undefined behavior, this function makes sure that no one
/// holds any locks.
///
/// This function should be called right before fork().
pub fn pause_for_fork() {
    TIMER_MANAGER.pause_for_fork();
}

/// Unpause the timer after fork()
///
/// This function should be called right after fork().
pub fn unpause() {
    TIMER_MANAGER.unpause();
}

#[cfg(test)]
mod tests {
    use super::*;

    #[tokio::test]
    async fn test_timeout() {
        let fut = tokio_sleep(Duration::from_secs(1000));
        let to = fast_timeout(Duration::from_secs(1), fut);
        assert!(to.await.is_err())
    }

    #[tokio::test]
    async fn test_instantly_return() {
        let fut = async { 1 };
        let to = fast_timeout(Duration::from_secs(1), fut);
        assert_eq!(to.await.unwrap(), 1)
    }

    #[tokio::test]
    async fn test_delayed_return() {
        let fut = async {
            tokio_sleep(Duration::from_secs(1)).await;
            1
        };
        let to = fast_timeout(Duration::from_secs(1000), fut);
        assert_eq!(to.await.unwrap(), 1)
    }

    #[tokio::test]
    async fn test_sleep() {
        let fut = async {
            fast_sleep(Duration::from_secs(1)).await;
            1
        };
        let to = fast_timeout(Duration::from_secs(1000), fut);
        assert_eq!(to.await.unwrap(), 1)
    }
}


================================================
FILE: pingora-timeout/src/lib.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#![warn(clippy::all)]

//! A drop-in replacement of [tokio::time::timeout] which is much more efficient.
//!
//! Similar to [tokio::time::timeout] but more efficient on busy concurrent IOs where timeouts are
//! created and canceled very frequently.
//!
//! This crate provides the following optimizations
//! - The timeouts lazily initializes their timer when the Future is pending for the first time.
//! - There is no global lock for creating and cancelling timeouts.
//! - Timeout timers are rounded to the next 10ms tick and timers are shared across all timeouts with the same deadline.
//!
//! Benchmark:
//!
//! 438.302µs total, 4ns avg per iteration
//!
//! v.s. Tokio timeout():
//!
//! 10.716192ms total, 107ns avg per iteration
//!

pub mod fast_timeout;
pub mod timer;

pub use fast_timeout::fast_sleep as sleep;
pub use fast_timeout::fast_timeout as timeout;

use pin_project_lite::pin_project;
use std::future::Future;
use std::pin::Pin;
use std::task::{self, Poll};
use tokio::time::{sleep as tokio_sleep, Duration};

/// The interface to start a timeout
///
/// Users don't need to interact with this trait
pub trait ToTimeout {
    fn timeout(&self) -> Pin<Box<dyn Future<Output = ()> + Send + Sync>>;
    fn create(d: Duration) -> Self;
}

/// The timeout generated by [tokio_timeout()].
///
/// Users don't need to interact with this object.
pub struct TokioTimeout(Duration);

impl ToTimeout for TokioTimeout {
    fn timeout(&self) -> Pin<Box<dyn Future<Output = ()> + Send + Sync>> {
        Box::pin(tokio_sleep(self.0))
    }

    fn create(d: Duration) -> Self {
        TokioTimeout(d)
    }
}

/// The error type returned when the timeout is reached.
#[derive(Debug)]
pub struct Elapsed;

impl std::fmt::Display for Elapsed {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "Timeout Elapsed")
    }
}

impl std::error::Error for Elapsed {}

/// The [tokio::time::timeout] with just lazy timer initialization.
///
/// The timer is created the first time the `future` is pending. This avoids unnecessary timer
/// creation and cancellation on busy IOs with a good chance to be already ready (e.g., reading
/// data from TCP where the recv buffer already has a lot of data to read right away).
pub fn tokio_timeout<T>(duration: Duration, future: T) -> Timeout<T, TokioTimeout>
where
    T: Future,
{
    Timeout::<T, TokioTimeout>::new_with_delay(future, duration)
}

pin_project! {
    /// The timeout future returned by the timeout functions
    #[must_use = "futures do nothing unless you `.await` or poll them"]
    pub struct Timeout<T, F> {
        #[pin]
        value: T,
        #[pin]
        delay: Option<Pin<Box<dyn Future<Output = ()> + Send + Sync>>>,
        callback: F, // callback to create the timer
    }
}

impl<T, F> Timeout<T, F>
where
    F: ToTimeout,
{
    pub(crate) fn new_with_delay(value: T, d: Duration) -> Timeout<T, F> {
        Timeout {
            value,
            delay: None,
            callback: F::create(d),
        }
    }
}

impl<T, F> Future for Timeout<T, F>
where
    T: Future,
    F: ToTimeout,
{
    type Output = Result<T::Output, Elapsed>;

    fn poll(self: Pin<&mut Self>, cx: &mut task::Context<'_>) -> Poll<Self::Output> {
        let mut me = self.project();

        // First, try polling the future
        if let Poll::Ready(v) = me.value.poll(cx) {
            return Poll::Ready(Ok(v));
        }

        let delay = me
            .delay
            .get_or_insert_with(|| Box::pin(me.callback.timeout()));

        match delay.as_mut().poll(cx) {
            Poll::Pending => Poll::Pending,
            Poll::Ready(()) => Poll::Ready(Err(Elapsed {})),
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[tokio::test]
    async fn test_timeout() {
        let fut = tokio_sleep(Duration::from_secs(1000));
        let to = timeout(Duration::from_secs(1), fut);
        assert!(to.await.is_err())
    }

    #[tokio::test]
    async fn test_instantly_return() {
        let fut = async { 1 };
        let to = timeout(Duration::from_secs(1), fut);
        assert_eq!(to.await.unwrap(), 1)
    }

    #[tokio::test]
    async fn test_delayed_return() {
        let fut = async {
            tokio_sleep(Duration::from_secs(1)).await;
            1
        };
        let to = timeout(Duration::from_secs(1000), fut);
        assert_eq!(to.await.unwrap(), 1)
    }
}


================================================
FILE: pingora-timeout/src/timer.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Lightweight timer for systems with high rate of operations with timeout
//! associated with them
//!
//! Users don't need to interact with this module.
//!
//! The idea is to bucket timers into finite time slots so that operations that
//! start and end quickly don't have to create their own timers all the time
//!
//! Benchmark:
//! - create 7.809622ms total, 78ns avg per iteration
//! - drop: 1.348552ms total, 13ns avg per iteration
//!
//! tokio timer:
//! - create 34.317439ms total, 343ns avg per iteration
//! - drop: 10.694154ms total, 106ns avg per iteration

use parking_lot::RwLock;
use std::collections::BTreeMap;
use std::sync::atomic::{AtomicBool, AtomicI64, Ordering};
use std::sync::Arc;
use std::time::{Duration, Instant};
use thread_local::ThreadLocal;
use tokio::sync::Notify;

const RESOLUTION_MS: u64 = 10;
const RESOLUTION_DURATION: Duration = Duration::from_millis(RESOLUTION_MS);

// round to the NEXT timestamp based on the resolution
#[inline]
fn round_to(raw: u128, resolution: u128) -> u128 {
    raw - 1 + resolution - (raw - 1) % resolution
}
// millisecond resolution as most
#[derive(PartialEq, PartialOrd, Eq, Ord, Clone, Copy, Debug)]
struct Time(u128);

impl From<u128> for Time {
    fn from(raw_ms: u128) -> Self {
        Time(round_to(raw_ms, RESOLUTION_MS as u128))
    }
}

impl From<Duration> for Time {
    fn from(d: Duration) -> Self {
        Time(round_to(d.as_millis(), RESOLUTION_MS as u128))
    }
}

impl Time {
    pub fn not_after(&self, ts: u128) -> bool {
        self.0 <= ts
    }
}

/// the stub for waiting for a timer to be expired.
pub struct TimerStub(Arc<Notify>, Arc<AtomicBool>);

impl TimerStub {
    /// Wait for the timer to expire.
    pub async fn poll(self) {
        if self.1.load(Ordering::SeqCst) {
            return;
        }
        self.0.notified().await;
    }
}

struct Timer(Arc<Notify>, Arc<AtomicBool>);

impl Timer {
    pub fn new() -> Self {
        Timer(Arc::new(Notify::new()), Arc::new(AtomicBool::new(false)))
    }

    pub fn fire(&self) {
        self.1.store(true, Ordering::SeqCst);
        self.0.notify_waiters();
    }

    pub fn subscribe(&self) -> TimerStub {
        TimerStub(self.0.clone(), self.1.clone())
    }
}

/// The object that holds all the timers registered to it.
pub struct TimerManager {
    // each thread insert into its local timer tree to avoid lock contention
    timers: ThreadLocal<RwLock<BTreeMap<Time, Timer>>>,
    zero: Instant, // the reference zero point of Timestamp
    // Start a new clock thread if this is -1 or staled. The clock thread should keep updating this
    clock_watchdog: AtomicI64,
    paused: AtomicBool,
}

// Consider the clock thread is dead after it fails to update the thread in DELAYS_SEC
const DELAYS_SEC: i64 = 2; // TODO: make sure this value is larger than RESOLUTION_DURATION

impl Default for TimerManager {
    fn default() -> Self {
        TimerManager {
            timers: ThreadLocal::new(),
            zero: Instant::now(),
            clock_watchdog: AtomicI64::new(-DELAYS_SEC),
            paused: AtomicBool::new(false),
        }
    }
}

impl TimerManager {
    /// Create a new [TimerManager]
    pub fn new() -> Self {
        Self::default()
    }

    // This thread sleeps for a resolution time and then fires all the timers that are due to fire
    pub(crate) fn clock_thread(&self) {
        loop {
            std::thread::sleep(RESOLUTION_DURATION);
            let now = Instant::now() - self.zero;
            self.clock_watchdog
                .store(now.as_secs() as i64, Ordering::Relaxed);
            if self.is_paused_for_fork() {
                // just stop acquiring the locks, waiting for fork to happen
                continue;
            }
            let now = now.as_millis();
            // iterate through the timer tree for all threads
            for thread_timer in self.timers.iter() {
                let mut timers = thread_timer.write();
                // Fire all timers until now
                loop {
                    let key_to_remove = timers.iter().next().and_then(|(k, _)| {
                        if k.not_after(now) {
                            Some(*k)
                        } else {
                            None
                        }
                    });
                    if let Some(k) = key_to_remove {
                        let timer = timers.remove(&k);
                        // safe to unwrap, the key is from iter().next()
                        timer.unwrap().fire();
                    } else {
                        break;
                    }
                }
                // write lock drops here
            }
        }
    }

    // False if the clock is already started
    // If true, the caller must start the clock thread next
    pub(crate) fn should_i_start_clock(&self) -> bool {
        let Err(prev) = self.is_clock_running() else {
            return false;
        };
        let now = Instant::now().duration_since(self.zero).as_secs() as i64;
        let res =
            self.clock_watchdog
                .compare_exchange(prev, now, Ordering::SeqCst, Ordering::SeqCst);
        res.is_ok()
    }

    // Ok(()) if clock is running (watch dog is within DELAYS_SEC of now)
    // Err(time) if watch do stopped at `time`
    pub(crate) fn is_clock_running(&self) -> Result<(), i64> {
        let now = Instant::now().duration_since(self.zero).as_secs() as i64;
        let prev = self.clock_watchdog.load(Ordering::SeqCst);
        if now < prev + DELAYS_SEC {
            Ok(())
        } else {
            Err(prev)
        }
    }

    /// Register a timer.
    ///
    /// When the timer expires, the [TimerStub] will be notified.
    pub fn register_timer(&self, duration: Duration) -> TimerStub {
        if self.is_paused_for_fork() {
            // Return a dummy TimerStub that will trigger right away.
            // This is fine assuming pause_for_fork() is called right before fork().
            // The only possible register_timer() is from another thread which will
            // be entirely lost after fork()
            // TODO: buffer these register calls instead (without a lock)
            let timer = Timer::new();
            timer.fire();
            return timer.subscribe();
        }
        let now: Time = (Instant::now() + duration - self.zero).into();
        {
            let timers = self.timers.get_or(|| RwLock::new(BTreeMap::new())).read();
            if let Some(t) = timers.get(&now) {
                return t.subscribe();
            }
        } // drop read lock

        let timer = Timer::new();
        let mut timers = self.timers.get_or(|| RwLock::new(BTreeMap::new())).write();
        // Usually we check if another thread has insert the same node before we get the write lock,
        // but because only this thread will insert anything to its local timers tree, there
        // is no possible race that can happen. The only other thread is the clock thread who
        // only removes timer from the tree
        let stub = timer.subscribe();
        timers.insert(now, timer);
        stub
    }

    fn is_paused_for_fork(&self) -> bool {
        self.paused.load(Ordering::SeqCst)
    }

    /// Pause the timer for fork()
    ///
    /// Because RwLock across fork() is undefined behavior, this function makes sure that no one
    /// holds any locks.
    ///
    /// This function should be called right before fork().
    pub fn pause_for_fork(&self) {
        self.paused.store(true, Ordering::SeqCst);
        // wait for everything to get out of their locks
        std::thread::sleep(RESOLUTION_DURATION * 2);
    }

    /// Unpause the timer after fork()
    ///
    /// This function should be called right after fork().
    pub fn unpause(&self) {
        self.paused.store(false, Ordering::SeqCst)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_round() {
        assert_eq!(round_to(30, 10), 30);
        assert_eq!(round_to(31, 10), 40);
        assert_eq!(round_to(29, 10), 30);
    }

    #[test]
    fn test_time() {
        let t: Time = 128.into(); // t will round to 130
        assert_eq!(t, Duration::from_millis(130).into());
        assert!(!t.not_after(128));
        assert!(!t.not_after(129));
        assert!(t.not_after(130));
        assert!(t.not_after(131));
    }

    #[tokio::test]
    async fn test_timer_manager() {
        let tm_a = Arc::new(TimerManager::new());
        let tm = tm_a.clone();
        std::thread::spawn(move || tm_a.clock_thread());

        let now = Instant::now();
        let t1 = tm.register_timer(Duration::from_secs(1));
        let t2 = tm.register_timer(Duration::from_secs(1));
        t1.poll().await;
        assert_eq!(now.elapsed().as_secs(), 1);
        let now = Instant::now();
        t2.poll().await;
        // t2 fired along t1 so no extra wait time
        assert_eq!(now.elapsed().as_secs(), 0);
    }

    #[test]
    fn test_timer_manager_start_check() {
        let tm = Arc::new(TimerManager::new());
        assert!(tm.should_i_start_clock());
        assert!(!tm.should_i_start_clock());
        assert!(tm.is_clock_running().is_ok());
    }

    #[test]
    fn test_timer_manager_watchdog() {
        let tm = Arc::new(TimerManager::new());
        assert!(tm.should_i_start_clock());
        assert!(!tm.should_i_start_clock());

        // we don't actually start the clock thread, sleep for the watchdog to expire
        std::thread::sleep(Duration::from_secs(DELAYS_SEC as u64 + 1));
        assert!(tm.is_clock_running().is_err());
        assert!(tm.should_i_start_clock());
    }

    #[tokio::test]
    async fn test_timer_manager_pause() {
        let tm_a = Arc::new(TimerManager::new());
        let tm = tm_a.clone();
        std::thread::spawn(move || tm_a.clock_thread());

        let now = Instant::now();
        let t1 = tm.register_timer(Duration::from_secs(2));
        tm.pause_for_fork();
        // no actual fork happen, we just test that pause and unpause work

        // any timer in this critical section is timed out right away
        let t2 = tm.register_timer(Duration::from_secs(2));
        t2.poll().await;
        assert_eq!(now.elapsed().as_secs(), 0);

        std::thread::sleep(Duration::from_secs(1));
        tm.unpause();
        t1.poll().await;
        assert_eq!(now.elapsed().as_secs(), 2);
    }
}


================================================
FILE: tinyufo/Cargo.toml
================================================
[package]
name = "TinyUFO"
version = "0.8.0"
authors = ["Yuchen Wu <yuchen@cloudflare.com>"]
edition = "2021"
license = "Apache-2.0"
description = "In-memory cache implementation with TinyLFU as the admission policy and S3-FIFO as the eviction policy"
repository = "https://github.com/cloudflare/pingora"
categories = ["algorithms", "caching"]
keywords = ["cache", "pingora"]

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[lib]
name = "tinyufo"
path = "src/lib.rs"

[dependencies]
ahash = { workspace = true }
flurry = "0.5"
crossbeam-queue = "0"
crossbeam-skiplist = "0"

[dev-dependencies]
rand = "0.9"
lru = { workspace = true }
rand_distr = "0.5"
moka = { version = "0", features = ["sync"] }
dhat = "0"
quick_cache = "0.6"
triomphe = "<=0.1.11" # 0.1.12 requires Rust 1.76

[[bench]]
name = "bench_perf"
harness = false

[[bench]]
name = "bench_hit_ratio"
harness = false

[[bench]]
name = "bench_memory"
harness = false


================================================
FILE: tinyufo/LICENSE
================================================

                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: tinyufo/README.md
================================================
# TinyUFO

TinyUFO is a fast and efficient in-memory cache. It adopts the state-of-the-art [S3-FIFO](https://s3fifo.com/) as well as [TinyLFU](https://arxiv.org/abs/1512.00727) algorithms to achieve high throughput and high hit ratio as the same time.

## Usage

See docs

## Performance Comparison
We compare TinyUFO with [lru](https://crates.io/crates/lru), the most commonly used cache algorithm and [moka](https://crates.io/crates/moka), another [great](https://github.com/rust-lang/crates.io/pull/3999) cache library that implements TinyLFU.

### Hit Ratio

The table below show the cache hit ratio of the compared algorithm under different size of cache, zipf=1.

|cache size / total assets | TinyUFO | TinyUFO - LRU | TinyUFO - moka (TinyLFU) |
| -------- | ------- | ------- | ------ |
| 0.5% | 45.26% | +14.21pp | -0.33pp
| 1% | 52.35% | +13.19pp | +1.69pp
| 5% | 68.89% | +10.14pp | +1.91pp
| 10% | 75.98% | +8.39pp | +1.59pp
| 25% | 85.34% | +5.39pp | +0.95pp

Both TinyUFO and moka greatly improves hit ratio from lru. TinyUFO is the one better in this workload.
[This paper](https://dl.acm.org/doi/pdf/10.1145/3600006.3613147) contains more thorough cache performance
evaluations S3-FIFO, which TinyUFO varies from, against many caching algorithms under a variety of workloads.

### Speed

The table below shows the number of operations performed per second for each cache library. The tests are performed using 8 threads on a x64 Linux desktop.

| Setup | TinyUFO | LRU | moka |
| -------- | ------- | ------- | ------ |
| Pure read | 148.7 million ops | 7.0 million ops | 14.1 million ops
| Mixed read/write | 80.9 million ops | 6.8 million ops | 16.6 million ops

Because of TinyUFO's lock-free design, it greatly outperforms the others.

### Memory overhead

TinyUFO provides a compact mode to trade raw read speed for more memory efficiency. Whether the saving worthy the trade off depends on the actual size and the work load. For small in-memory assets, the saved memory means more things can be cached.

The table below show the memory allocation (in bytes) of the compared cache library under certain workloads to store zero-sized assets.

| cache size | TinyUFO | TinyUFO compact | LRU | moka |
| -------- | ------- | ------- | ------- | ------ |
| 100 | 39,409 | 19,000 | 9,408 | 354,376
| 1000 | 236,053 | 86,352 | 128,512 | 535,888
| 10000 | 2,290,635 | 766,024|  1,075,648 | 2,489,088

================================================
FILE: tinyufo/benches/bench_hit_ratio.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use rand::prelude::*;
use std::num::NonZeroUsize;

const ITEMS: usize = 10_000;
const ITERATIONS: usize = 5_000_000;

fn bench_one(zip_exp: f64, cache_size_percent: f32) {
    print!("{zip_exp:.2}, {cache_size_percent:4}\t\t\t");
    let cache_size = (cache_size_percent * ITEMS as f32).round() as usize;
    let mut lru = lru::LruCache::<u64, ()>::new(NonZeroUsize::new(cache_size).unwrap());
    let moka = moka::sync::Cache::new(cache_size as u64);
    let quick_cache = quick_cache::sync::Cache::new(cache_size);
    let tinyufo = tinyufo::TinyUfo::new(cache_size, cache_size);

    let mut rng = rand::rng();
    let zipf = rand_distr::Zipf::new(ITEMS as f64, zip_exp).unwrap();

    let mut lru_hit = 0;
    let mut moka_hit = 0;
    let mut quick_cache_hit = 0;
    let mut tinyufo_hit = 0;

    for _ in 0..ITERATIONS {
        let key = zipf.sample(&mut rng) as u64;

        if lru.get(&key).is_some() {
            lru_hit += 1;
        } else {
            lru.push(key, ());
        }

        if moka.get(&key).is_some() {
            moka_hit += 1;
        } else {
            moka.insert(key, ());
        }

        if quick_cache.get(&key).is_some() {
            quick_cache_hit += 1;
        } else {
            quick_cache.insert(key, ());
        }

        if tinyufo.get(&key).is_some() {
            tinyufo_hit += 1;
        } else {
            tinyufo.put(key, (), 1);
        }
    }

    print!("{:.2}%\t\t", lru_hit as f32 / ITERATIONS as f32 * 100.0);
    print!("{:.2}%\t\t", moka_hit as f32 / ITERATIONS as f32 * 100.0);
    print!(
        "{:.2}%\t\t",
        quick_cache_hit as f32 / ITERATIONS as f32 * 100.0
    );
    println!("{:.2}%", tinyufo_hit as f32 / ITERATIONS as f32 * 100.0);
}

/*
cargo bench --bench bench_hit_ratio

zipf & cache size               lru             moka            QuickC          TinyUFO
0.90, 0.005                     19.24%          33.43%          32.33%          33.35%
0.90, 0.01                      26.23%          37.86%          38.80%          40.06%
0.90, 0.05                      45.58%          55.13%          55.71%          57.80%
0.90,  0.1                      55.72%          64.15%          64.01%          66.36%
0.90, 0.25                      71.16%          77.12%          75.92%          78.53%
1.00, 0.005                     31.08%          45.68%          44.07%          45.15%
1.00, 0.01                      39.17%          50.80%          50.90%          52.30%
1.00, 0.05                      58.71%          66.92%          67.09%          68.79%
1.00,  0.1                      67.59%          74.28%          74.00%          75.92%
1.00, 0.25                      79.94%          84.35%          83.45%          85.28%
1.05, 0.005                     37.66%          51.78%          50.13%          51.12%
1.05, 0.01                      46.07%          57.13%          57.07%          58.41%
1.05, 0.05                      65.06%          72.37%          72.41%          73.93%
1.05,  0.1                      73.13%          78.97%          78.60%          80.24%
1.05, 0.25                      83.74%          87.41%          86.68%          88.14%
1.10, 0.005                     44.49%          57.84%          56.16%          57.28%
1.10, 0.01                      52.97%          63.19%          62.99%          64.24%
1.10, 0.05                      70.95%          77.24%          77.26%          78.55%
1.10,  0.1                      78.05%          82.86%          82.66%          84.01%
1.10, 0.25                      87.12%          90.10%          89.51%          90.66%
1.50, 0.005                     85.27%          89.92%          89.08%          89.69%
1.50, 0.01                      89.86%          92.77%          92.44%          92.94%
1.50, 0.05                      96.01%          97.08%          96.99%          97.23%
1.50,  0.1                      97.51%          98.15%          98.08%          98.24%
1.50, 0.25                      98.81%          99.09%          99.03%          99.09%
 */

fn main() {
    println!("zipf & cache size\t\tlru\t\tmoka\t\tQuickC\t\tTinyUFO",);
    for zif_exp in [0.9, 1.0, 1.05, 1.1, 1.5] {
        for cache_capacity in [0.005, 0.01, 0.05, 0.1, 0.25] {
            bench_one(zif_exp, cache_capacity);
        }
    }
}


================================================
FILE: tinyufo/benches/bench_memory.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#[global_allocator]
static ALLOC: dhat::Alloc = dhat::Alloc;

use rand::prelude::*;
use std::num::NonZeroUsize;

const ITERATIONS: usize = 5_000_000;

fn bench_lru(zip_exp: f64, items: usize, cache_size_percent: f32) {
    let cache_size = (cache_size_percent * items as f32).round() as usize;
    let mut lru = lru::LruCache::<u64, ()>::new(NonZeroUsize::new(cache_size).unwrap());

    let mut rng = rand::rng();
    let zipf = rand_distr::Zipf::new(items as f64, zip_exp).unwrap();

    for _ in 0..ITERATIONS {
        let key = zipf.sample(&mut rng) as u64;

        if lru.get(&key).is_none() {
            lru.push(key, ());
        }
    }
}

fn bench_moka(zip_exp: f64, items: usize, cache_size_percent: f32) {
    let cache_size = (cache_size_percent * items as f32).round() as usize;
    let moka = moka::sync::Cache::new(cache_size as u64);

    let mut rng = rand::rng();
    let zipf = rand_distr::Zipf::new(items as f64, zip_exp).unwrap();

    for _ in 0..ITERATIONS {
        let key = zipf.sample(&mut rng) as u64;

        if moka.get(&key).is_none() {
            moka.insert(key, ());
        }
    }
}

fn bench_quick_cache(zip_exp: f64, items: usize, cache_size_percent: f32) {
    let cache_size = (cache_size_percent * items as f32).round() as usize;
    let quick_cache = quick_cache::sync::Cache::new(cache_size);

    let mut rng = rand::rng();
    let zipf = rand_distr::Zipf::new(items as f64, zip_exp).unwrap();

    for _ in 0..ITERATIONS {
        let key = zipf.sample(&mut rng) as u64;

        if quick_cache.get(&key).is_none() {
            quick_cache.insert(key, ());
        }
    }
}

fn bench_tinyufo(zip_exp: f64, items: usize, cache_size_percent: f32) {
    let cache_size = (cache_size_percent * items as f32).round() as usize;
    let tinyufo = tinyufo::TinyUfo::new(cache_size, (cache_size as f32 * 1.0) as usize);

    let mut rng = rand::rng();
    let zipf = rand_distr::Zipf::new(items as f64, zip_exp).unwrap();

    for _ in 0..ITERATIONS {
        let key = zipf.sample(&mut rng) as u64;

        if tinyufo.get(&key).is_none() {
            tinyufo.put(key, (), 1);
        }
    }
}

fn bench_tinyufo_compact(zip_exp: f64, items: usize, cache_size_percent: f32) {
    let cache_size = (cache_size_percent * items as f32).round() as usize;
    let tinyufo = tinyufo::TinyUfo::new_compact(cache_size, (cache_size as f32 * 1.0) as usize);

    let mut rng = rand::rng();
    let zipf = rand_distr::Zipf::new(items as f64, zip_exp).unwrap();

    for _ in 0..ITERATIONS {
        let key = zipf.sample(&mut rng) as u64;

        if tinyufo.get(&key).is_none() {
            tinyufo.put(key, (), 1);
        }
    }
}

/*
cargo bench --bench bench_memory

total items 1000, cache size 10%
lru
dhat: At t-gmax: 9,408 bytes in 106 blocks
moka
dhat: At t-gmax: 354,232 bytes in 1,581 blocks
QuickCache
dhat: At t-gmax: 11,840 bytes in 8 blocks
TinyUFO
dhat: At t-gmax: 37,337 bytes in 351 blocks
TinyUFO compat
dhat: At t-gmax: 19,000 bytes in 60 blocks

total items 10000, cache size 10%
lru
dhat: At t-gmax: 128,512 bytes in 1,004 blocks
moka
dhat: At t-gmax: 535,320 bytes in 7,278 blocks
QuickCache
dhat: At t-gmax: 93,000 bytes in 66 blocks
TinyUFO
dhat: At t-gmax: 236,053 bytes in 2,182 blocks
TinyUFO Compact
dhat: At t-gmax: 86,352 bytes in 1,128 blocks

total items 100000, cache size 10%
lru
dhat: At t-gmax: 1,075,648 bytes in 10,004 blocks
moka
dhat: At t-gmax: 2,489,088 bytes in 62,374 blocks
QuickCache
dhat: At t-gmax: 863,752 bytes in 66 blocks
TinyUFO
dhat: At t-gmax: 2,290,635 bytes in 20,467 blocks
TinyUFO
dhat: At t-gmax: 766,024 bytes in 10,421 blocks
*/

fn main() {
    for items in [1000, 10_000, 100_000] {
        println!("\ntotal items {items}, cache size 10%");
        {
            let _profiler = dhat::Profiler::new_heap();
            bench_lru(1.05, items, 0.1);
            println!("lru");
        }

        {
            let _profiler = dhat::Profiler::new_heap();
            bench_moka(1.05, items, 0.1);
            println!("\nmoka");
        }

        {
            let _profiler = dhat::Profiler::new_heap();
            bench_quick_cache(1.05, items, 0.1);
            println!("\nQuickCache");
        }

        {
            let _profiler = dhat::Profiler::new_heap();
            bench_tinyufo(1.05, items, 0.1);
            println!("\nTinyUFO");
        }

        {
            let _profiler = dhat::Profiler::new_heap();
            bench_tinyufo_compact(1.05, items, 0.1);
            println!("\nTinyUFO Compact");
        }
    }
}


================================================
FILE: tinyufo/benches/bench_perf.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use rand::prelude::*;
use std::num::NonZeroUsize;
use std::sync::{Barrier, Mutex};
use std::thread;
use std::time::Instant;

const ITEMS: usize = 100;

const ITERATIONS: usize = 5_000_000;
const THREADS: usize = 8;

/*
cargo bench  --bench bench_perf

Note: the performance number vary a lot on different planform, CPU and CPU arch
Below is from Linux + Ryzen 5 7600 CPU

lru read total 150.423567ms, 30ns avg per operation, 33239472 ops per second
moka read total 462.133322ms, 92ns avg per operation, 10819389 ops per second
quick_cache read total 125.618216ms, 25ns avg per operation, 39803144 ops per second
tinyufo read total 199.007359ms, 39ns avg per operation, 25124698 ops per second
tinyufo compact read total 331.145859ms, 66ns avg per operation, 15099087 ops per second

lru read total 5.402631847s, 1.08µs avg per operation, 925474 ops per second
...
total 6960329 ops per second

moka read total 2.742258211s, 548ns avg per operation, 1823314 ops per second
...
total 14072430 ops per second

quick_cache read total 1.186566627s, 237ns avg per operation, 4213838 ops per second
...
total 33694776 ops per second

tinyufo read total 208.346855ms, 41ns avg per operation, 23998444 ops per second
...
total 148691408 ops per second

tinyufo compact read total 539.403037ms, 107ns avg per operation, 9269507 ops per second
...
total 74130632 ops per second

lru mixed read/write 5.500309876s, 1.1µs avg per operation, 909039 ops per second, 407431 misses
...
total 6846743 ops per second

moka mixed read/write 2.368500882s, 473ns avg per operation, 2111040 ops per second 279324 misses
...
total 16557962 ops per second

quick_cache mixed read/write 838.072588ms, 167ns avg per operation, 5966070 ops per second 315051 misses
...
total 47698472 ops per second

tinyufo mixed read/write 456.134531ms, 91ns avg per operation, 10961678 ops per second, 294977 misses
...
total 80865792 ops per second

tinyufo compact mixed read/write 638.770053ms, 127ns avg per operation, 7827543 ops per second, 294641 misses
...
total 62600844 ops per second
*/

fn main() {
    println!("Note: these performance numbers vary a lot across different CPUs and OSes.");
    // we don't bench eviction here so make the caches large enough to hold all
    let lru = Mutex::new(lru::LruCache::<u64, ()>::unbounded());
    let moka = moka::sync::Cache::new(ITEMS as u64 + 10);
    let quick_cache = quick_cache::sync::Cache::new(ITEMS + 10);
    let tinyufo = tinyufo::TinyUfo::new(ITEMS + 10, 10);
    let tinyufo_compact = tinyufo::TinyUfo::new_compact(ITEMS + 10, 10);

    // populate first, then we bench access/promotion
    for i in 0..ITEMS {
        lru.lock().unwrap().put(i as u64, ());
        moka.insert(i as u64, ());
        quick_cache.insert(i as u64, ());
        tinyufo.put(i as u64, (), 1);
        tinyufo_compact.put(i as u64, (), 1);
    }

    // single thread
    let mut rng = rand::rng();
    let zipf = rand_distr::Zipf::new(ITEMS as f64, 1.03).unwrap();

    let before = Instant::now();
    for _ in 0..ITERATIONS {
        lru.lock().unwrap().get(&(zipf.sample(&mut rng) as u64));
    }
    let elapsed = before.elapsed();
    println!(
        "lru read total {elapsed:?}, {:?} avg per operation, {} ops per second",
        elapsed / ITERATIONS as u32,
        (ITERATIONS as f32 / elapsed.as_secs_f32()) as u32
    );

    let before = Instant::now();
    for _ in 0..ITERATIONS {
        moka.get(&(zipf.sample(&mut rng) as u64));
    }
    let elapsed = before.elapsed();
    println!(
        "moka read total {elapsed:?}, {:?} avg per operation, {} ops per second",
        elapsed / ITERATIONS as u32,
        (ITERATIONS as f32 / elapsed.as_secs_f32()) as u32
    );

    let before = Instant::now();
    for _ in 0..ITERATIONS {
        quick_cache.get(&(zipf.sample(&mut rng) as u64));
    }
    let elapsed = before.elapsed();
    println!(
        "quick_cache read total {elapsed:?}, {:?} avg per operation, {} ops per second",
        elapsed / ITERATIONS as u32,
        (ITERATIONS as f32 / elapsed.as_secs_f32()) as u32
    );

    let before = Instant::now();
    for _ in 0..ITERATIONS {
        tinyufo.get(&(zipf.sample(&mut rng) as u64));
    }
    let elapsed = before.elapsed();
    println!(
        "tinyufo read total {elapsed:?}, {:?} avg per operation, {} ops per second",
        elapsed / ITERATIONS as u32,
        (ITERATIONS as f32 / elapsed.as_secs_f32()) as u32
    );

    let before = Instant::now();
    for _ in 0..ITERATIONS {
        tinyufo_compact.get(&(zipf.sample(&mut rng) as u64));
    }
    let elapsed = before.elapsed();
    println!(
        "tinyufo compact read total {elapsed:?}, {:?} avg per operation, {} ops per second",
        elapsed / ITERATIONS as u32,
        (ITERATIONS as f32 / elapsed.as_secs_f32()) as u32
    );

    // concurrent
    let wg = Barrier::new(THREADS);
    let before = Instant::now();
    thread::scope(|s| {
        for _ in 0..THREADS {
            s.spawn(|| {
                let mut rng = rand::rng();
                let zipf = rand_distr::Zipf::new(ITEMS as f64, 1.03).unwrap();
                wg.wait();
                let before = Instant::now();
                for _ in 0..ITERATIONS {
                    lru.lock().unwrap().get(&(zipf.sample(&mut rng) as u64));
                }
                let elapsed = before.elapsed();
                println!(
                    "lru read total {elapsed:?}, {:?} avg per operation, {} ops per second",
                    elapsed / ITERATIONS as u32,
                    (ITERATIONS as f32 / elapsed.as_secs_f32()) as u32
                );
            });
        }
    });
    let elapsed = before.elapsed();
    println!(
        "total {} ops per second",
        (ITERATIONS as f32 * THREADS as f32 / elapsed.as_secs_f32()) as u32
    );

    let wg = Barrier::new(THREADS);
    let before = Instant::now();
    thread::scope(|s| {
        for _ in 0..THREADS {
            s.spawn(|| {
                let mut rng = rand::rng();
                let zipf = rand_distr::Zipf::new(ITEMS as f64, 1.03).unwrap();
                wg.wait();
                let before = Instant::now();
                for _ in 0..ITERATIONS {
                    moka.get(&(zipf.sample(&mut rng) as u64));
                }
                let elapsed = before.elapsed();
                println!(
                    "moka read total {elapsed:?}, {:?} avg per operation, {} ops per second",
                    elapsed / ITERATIONS as u32,
                    (ITERATIONS as f32 / elapsed.as_secs_f32()) as u32
                );
            });
        }
    });
    let elapsed = before.elapsed();
    println!(
        "total {} ops per second",
        (ITERATIONS as f32 * THREADS as f32 / elapsed.as_secs_f32()) as u32
    );

    let wg = Barrier::new(THREADS);
    let before = Instant::now();
    thread::scope(|s| {
        for _ in 0..THREADS {
            s.spawn(|| {
                let mut rng = rand::rng();
                let zipf = rand_distr::Zipf::new(ITEMS as f64, 1.03).unwrap();
                wg.wait();
                let before = Instant::now();
                for _ in 0..ITERATIONS {
                    quick_cache.get(&(zipf.sample(&mut rng) as u64));
                }
                let elapsed = before.elapsed();
                println!(
                    "quick_cache read total {elapsed:?}, {:?} avg per operation, {} ops per second",
                    elapsed / ITERATIONS as u32,
                    (ITERATIONS as f32 / elapsed.as_secs_f32()) as u32
                );
            });
        }
    });
    let elapsed = before.elapsed();
    println!(
        "total {} ops per second",
        (ITERATIONS as f32 * THREADS as f32 / elapsed.as_secs_f32()) as u32
    );

    let wg = Barrier::new(THREADS);
    let before = Instant::now();
    thread::scope(|s| {
        for _ in 0..THREADS {
            s.spawn(|| {
                let mut rng = rand::rng();
                let zipf = rand_distr::Zipf::new(ITEMS as f64, 1.03).unwrap();
                wg.wait();
                let before = Instant::now();
                for _ in 0..ITERATIONS {
                    tinyufo.get(&(zipf.sample(&mut rng) as u64));
                }
                let elapsed = before.elapsed();
                println!(
                    "tinyufo read total {elapsed:?}, {:?} avg per operation, {} ops per second",
                    elapsed / ITERATIONS as u32,
                    (ITERATIONS as f32 / elapsed.as_secs_f32()) as u32
                );
            });
        }
    });
    let elapsed = before.elapsed();
    println!(
        "total {} ops per second",
        (ITERATIONS as f32 * THREADS as f32 / elapsed.as_secs_f32()) as u32
    );

    let wg = Barrier::new(THREADS);
    let before = Instant::now();
    thread::scope(|s| {
        for _ in 0..THREADS {
            s.spawn(|| {
                let mut rng = rand::rng();
                let zipf = rand_distr::Zipf::new(ITEMS as f64, 1.03).unwrap();
                wg.wait();
                let before = Instant::now();
                for _ in 0..ITERATIONS {
                    tinyufo_compact.get(&(zipf.sample(&mut rng) as u64));
                }
                let elapsed = before.elapsed();
                println!(
                    "tinyufo compact read total {elapsed:?}, {:?} avg per operation, {} ops per second",
                    elapsed / ITERATIONS as u32,
                    (ITERATIONS as f32 / elapsed.as_secs_f32()) as u32
                );
            });
        }
    });
    let elapsed = before.elapsed();
    println!(
        "total {} ops per second",
        (ITERATIONS as f32 * THREADS as f32 / elapsed.as_secs_f32()) as u32
    );

    ///// bench mixed read and write /////
    const CACHE_SIZE: usize = 1000;
    let items: usize = 10000;
    const ZIPF_EXP: f64 = 1.3;

    let lru = Mutex::new(lru::LruCache::<u64, ()>::new(
        NonZeroUsize::new(CACHE_SIZE).unwrap(),
    ));
    let wg = Barrier::new(THREADS);
    let before = Instant::now();
    thread::scope(|s| {
        for _ in 0..THREADS {
            s.spawn(|| {
                let mut miss_count = 0;
                let mut rng = rand::rng();
                let zipf = rand_distr::Zipf::new(items as f64, ZIPF_EXP).unwrap();
                wg.wait();
                let before = Instant::now();
                for _ in 0..ITERATIONS {
                    let key = zipf.sample(&mut rng) as u64;
                    let mut lru = lru.lock().unwrap();
                    if lru.get(&key).is_none() {
                        lru.put(key, ());
                        miss_count += 1;
                    }
                }
                let elapsed = before.elapsed();
                println!(
                    "lru mixed read/write {elapsed:?}, {:?} avg per operation, {} ops per second, {miss_count} misses",
                    elapsed / ITERATIONS as u32,
                    (ITERATIONS as f32 / elapsed.as_secs_f32()) as u32
                );
            });
        }
    });
    let elapsed = before.elapsed();
    println!(
        "total {} ops per second",
        (ITERATIONS as f32 * THREADS as f32 / elapsed.as_secs_f32()) as u32
    );

    let moka = moka::sync::Cache::new(CACHE_SIZE as u64);
    let wg = Barrier::new(THREADS);
    let before = Instant::now();
    thread::scope(|s| {
        for _ in 0..THREADS {
            s.spawn(|| {
                let mut miss_count = 0;
                let mut rng = rand::rng();
                let zipf = rand_distr::Zipf::new(items as f64, ZIPF_EXP).unwrap();
                wg.wait();
                let before = Instant::now();
                for _ in 0..ITERATIONS {
                    let key = zipf.sample(&mut rng) as u64;
                    if moka.get(&key).is_none() {
                        moka.insert(key, ());
                        miss_count += 1;
                    }
                }
                let elapsed = before.elapsed();
                println!(
                    "moka mixed read/write {elapsed:?}, {:?} avg per operation, {} ops per second {miss_count} misses",
                    elapsed / ITERATIONS as u32,
                    (ITERATIONS as f32 / elapsed.as_secs_f32()) as u32
                );
            });
        }
    });
    let elapsed = before.elapsed();
    println!(
        "total {} ops per second",
        (ITERATIONS as f32 * THREADS as f32 / elapsed.as_secs_f32()) as u32
    );

    let quick_cache = quick_cache::sync::Cache::new(CACHE_SIZE);
    let wg = Barrier::new(THREADS);
    let before = Instant::now();
    thread::scope(|s| {
        for _ in 0..THREADS {
            s.spawn(|| {
                let mut miss_count = 0;
                let mut rng = rand::rng();
                let zipf = rand_distr::Zipf::new(items as f64, ZIPF_EXP).unwrap();
                wg.wait();
                let before = Instant::now();
                for _ in 0..ITERATIONS {
                    let key = zipf.sample(&mut rng) as u64;
                    if quick_cache.get(&key).is_none() {
                        quick_cache.insert(key, ());
                        miss_count += 1;
                    }
                }
                let elapsed = before.elapsed();
                println!(
                    "quick_cache mixed read/write {elapsed:?}, {:?} avg per operation, {} ops per second {miss_count} misses",
                    elapsed / ITERATIONS as u32,
                    (ITERATIONS as f32 / elapsed.as_secs_f32()) as u32
                );
            });
        }
    });
    let elapsed = before.elapsed();
    println!(
        "total {} ops per second",
        (ITERATIONS as f32 * THREADS as f32 / elapsed.as_secs_f32()) as u32
    );

    let tinyufo = tinyufo::TinyUfo::new(CACHE_SIZE, CACHE_SIZE);
    let wg = Barrier::new(THREADS);
    let before = Instant::now();
    thread::scope(|s| {
        for _ in 0..THREADS {
            s.spawn(|| {
                let mut miss_count = 0;
                let mut rng = rand::rng();
                let zipf = rand_distr::Zipf::new(items as f64, ZIPF_EXP).unwrap();
                wg.wait();
                let before = Instant::now();
                for _ in 0..ITERATIONS {
                    let key = zipf.sample(&mut rng) as u64;
                    if tinyufo.get(&key).is_none() {
                        tinyufo.put(key, (), 1);
                        miss_count +=1;
                    }
                }
                let elapsed = before.elapsed();
                println!(
                    "tinyufo mixed read/write {elapsed:?}, {:?} avg per operation, {} ops per second, {miss_count} misses",
                    elapsed / ITERATIONS as u32,
                    (ITERATIONS as f32 / elapsed.as_secs_f32()) as u32,
                );
            });
        }
    });

    let elapsed = before.elapsed();
    println!(
        "total {} ops per second",
        (ITERATIONS as f32 * THREADS as f32 / elapsed.as_secs_f32()) as u32
    );

    let tinyufo_compact = tinyufo::TinyUfo::new(CACHE_SIZE, CACHE_SIZE);
    let wg = Barrier::new(THREADS);
    let before = Instant::now();
    thread::scope(|s| {
        for _ in 0..THREADS {
            s.spawn(|| {
                let mut miss_count = 0;
                let mut rng = rand::rng();
                let zipf = rand_distr::Zipf::new(items as f64, ZIPF_EXP).unwrap();
                wg.wait();
                let before = Instant::now();
                for _ in 0..ITERATIONS {
                    let key = zipf.sample(&mut rng) as u64;
                    if tinyufo_compact.get(&key).is_none() {
                        tinyufo_compact.put(key, (), 1);
                        miss_count +=1;
                    }
                }
                let elapsed = before.elapsed();
                println!(
                    "tinyufo compact mixed read/write {elapsed:?}, {:?} avg per operation, {} ops per second, {miss_count} misses",
                    elapsed / ITERATIONS as u32,
                    (ITERATIONS as f32 / elapsed.as_secs_f32()) as u32,
                );
            });
        }
    });

    let elapsed = before.elapsed();
    println!(
        "total {} ops per second",
        (ITERATIONS as f32 * THREADS as f32 / elapsed.as_secs_f32()) as u32
    );
}


================================================
FILE: tinyufo/src/buckets.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Concurrent storage backend

use super::{Bucket, Key};
use ahash::RandomState;
use crossbeam_skiplist::{map::Entry, SkipMap};
use flurry::HashMap;

/// N-shard skip list. Memory efficient, constant time lookup on average, but a bit slower
/// than hash map
pub struct Compact<T>(Box<[SkipMap<Key, Bucket<T>>]>);

impl<T: Send + 'static> Compact<T> {
    /// Create a new [Compact]
    pub fn new(total_items: usize, items_per_shard: usize) -> Self {
        assert!(items_per_shard > 0);

        let shards = std::cmp::max(total_items / items_per_shard, 1);
        let mut shard_array = vec![];
        for _ in 0..shards {
            shard_array.push(SkipMap::new());
        }
        Self(shard_array.into_boxed_slice())
    }

    pub fn get(&self, key: &Key) -> Option<Entry<'_, Key, Bucket<T>>> {
        let shard = *key as usize % self.0.len();
        self.0[shard].get(key)
    }

    pub fn get_map<V, F: FnOnce(Entry<Key, Bucket<T>>) -> V>(&self, key: &Key, f: F) -> Option<V> {
        let v = self.get(key);
        v.map(f)
    }

    fn insert(&self, key: Key, value: Bucket<T>) -> Option<()> {
        let shard = key as usize % self.0.len();
        let removed = self.0[shard].remove(&key);
        self.0[shard].insert(key, value);
        removed.map(|_| ())
    }

    fn remove(&self, key: &Key) {
        let shard = *key as usize % self.0.len();
        (&self.0)[shard].remove(key);
    }
}

// Concurrent hash map, fast but use more memory
pub struct Fast<T>(HashMap<Key, Bucket<T>, RandomState>);

impl<T: Send + Sync> Fast<T> {
    pub fn new(total_items: usize) -> Self {
        Self(HashMap::with_capacity_and_hasher(
            total_items,
            RandomState::new(),
        ))
    }

    pub fn get_map<V, F: FnOnce(&Bucket<T>) -> V>(&self, key: &Key, f: F) -> Option<V> {
        let pinned = self.0.pin();
        let v = pinned.get(key);
        v.map(f)
    }

    fn insert(&self, key: Key, value: Bucket<T>) -> Option<()> {
        let pinned = self.0.pin();
        pinned.insert(key, value).map(|_| ())
    }

    fn remove(&self, key: &Key) {
        let pinned = self.0.pin();
        pinned.remove(key);
    }
}

pub enum Buckets<T> {
    Fast(Box<Fast<T>>),
    Compact(Compact<T>),
}

impl<T: Send + Sync + 'static> Buckets<T> {
    pub fn new_fast(items: usize) -> Self {
        Self::Fast(Box::new(Fast::new(items)))
    }

    pub fn new_compact(items: usize, items_per_shard: usize) -> Self {
        Self::Compact(Compact::new(items, items_per_shard))
    }

    pub fn insert(&self, key: Key, value: Bucket<T>) -> Option<()> {
        match self {
            Self::Compact(c) => c.insert(key, value),
            Self::Fast(f) => f.insert(key, value),
        }
    }

    pub fn remove(&self, key: &Key) {
        match self {
            Self::Compact(c) => c.remove(key),
            Self::Fast(f) => f.remove(key),
        }
    }

    pub fn get_map<V, F: FnOnce(&Bucket<T>) -> V>(&self, key: &Key, f: F) -> Option<V> {
        match self {
            Self::Compact(c) => c.get_map(key, |v| f(v.value())),
            Self::Fast(c) => c.get_map(key, f),
        }
    }

    #[cfg(test)]
    pub fn get_queue(&self, key: &Key) -> Option<bool> {
        self.get_map(key, |v| v.queue.is_main())
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_fast() {
        let fast = Buckets::new_fast(10);

        assert!(fast.get_map(&1, |_| ()).is_none());

        let bucket = Bucket {
            queue: crate::Location::new_small(),
            weight: 1,
            uses: Default::default(),
            data: 1,
        };
        fast.insert(1, bucket);

        assert_eq!(fast.get_map(&1, |v| v.data), Some(1));

        fast.remove(&1);
        assert!(fast.get_map(&1, |_| ()).is_none());
    }

    #[test]
    fn test_compact() {
        let compact = Buckets::new_compact(10, 2);

        assert!(compact.get_map(&1, |_| ()).is_none());

        let bucket = Bucket {
            queue: crate::Location::new_small(),
            weight: 1,
            uses: Default::default(),
            data: 1,
        };
        compact.insert(1, bucket);

        assert_eq!(compact.get_map(&1, |v| v.data), Some(1));

        compact.remove(&1);
        assert!(compact.get_map(&1, |_| ()).is_none());
    }
}


================================================
FILE: tinyufo/src/estimation.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use ahash::RandomState;
use std::hash::Hash;
use std::sync::atomic::{AtomicU8, AtomicUsize, Ordering};

struct Estimator {
    estimator: Box<[(Box<[AtomicU8]>, RandomState)]>,
}

impl Estimator {
    fn optimal_paras(items: usize) -> (usize, usize) {
        use std::cmp::max;
        // derived from https://en.wikipedia.org/wiki/Count%E2%80%93min_sketch
        // width = ceil(e / ε)
        // depth = ceil(ln(1 − δ) / ln(1 / 2))
        let error_range = 1.0 / (items as f64);
        let failure_probability = 1.0 / (items as f64);
        (
            max((std::f64::consts::E / error_range).ceil() as usize, 16),
            max((failure_probability.ln() / 0.5f64.ln()).ceil() as usize, 2),
        )
    }

    fn optimal(items: usize) -> Self {
        let (slots, hashes) = Self::optimal_paras(items);
        Self::new(hashes, slots, RandomState::new)
    }

    fn compact(items: usize) -> Self {
        let (slots, hashes) = Self::optimal_paras(items / 100);
        Self::new(hashes, slots, RandomState::new)
    }

    #[cfg(test)]
    fn seeded(items: usize) -> Self {
        let (slots, hashes) = Self::optimal_paras(items);
        Self::new(hashes, slots, || RandomState::with_seeds(2, 3, 4, 5))
    }

    #[cfg(test)]
    fn seeded_compact(items: usize) -> Self {
        let (slots, hashes) = Self::optimal_paras(items / 100);
        Self::new(hashes, slots, || RandomState::with_seeds(2, 3, 4, 5))
    }

    /// Create a new `Estimator` with the given amount of hashes and columns (slots) using
    /// the given random source.
    pub fn new(hashes: usize, slots: usize, random: impl Fn() -> RandomState) -> Self {
        let mut estimator = Vec::with_capacity(hashes);
        for _ in 0..hashes {
            let mut slot = Vec::with_capacity(slots);
            for _ in 0..slots {
                slot.push(AtomicU8::new(0));
            }
            estimator.push((slot.into_boxed_slice(), random()));
        }

        Estimator {
            estimator: estimator.into_boxed_slice(),
        }
    }

    pub fn incr<T: Hash>(&self, key: T) -> u8 {
        let mut min = u8::MAX;
        for (slot, hasher) in self.estimator.iter() {
            let hash = hasher.hash_one(&key) as usize;
            let counter = &slot[hash % slot.len()];
            let (_current, new) = incr_no_overflow(counter);
            min = std::cmp::min(min, new);
        }
        min
    }

    /// Get the estimated frequency of `key`.
    pub fn get<T: Hash>(&self, key: T) -> u8 {
        let mut min = u8::MAX;
        for (slot, hasher) in self.estimator.iter() {
            let hash = hasher.hash_one(&key) as usize;
            let counter = &slot[hash % slot.len()];
            let current = counter.load(Ordering::Relaxed);
            min = std::cmp::min(min, current);
        }
        min
    }

    /// right shift all values inside this `Estimator`.
    pub fn age(&self, shift: u8) {
        for (slot, _) in self.estimator.iter() {
            for counter in slot.iter() {
                // we don't CAS because the only update between the load and store
                // is fetch_add(1), which should be fine to miss/ignore
                let c = counter.load(Ordering::Relaxed);
                counter.store(c >> shift, Ordering::Relaxed);
            }
        }
    }
}

fn incr_no_overflow(var: &AtomicU8) -> (u8, u8) {
    loop {
        let current = var.load(Ordering::Relaxed);
        if current == u8::MAX {
            return (current, current);
        }
        let new = if current == u8::MAX - 1 {
            u8::MAX
        } else {
            current + 1
        };
        if let Err(new) = var.compare_exchange(current, new, Ordering::Acquire, Ordering::Relaxed) {
            // someone else beat us to it
            if new == u8::MAX {
                // already max
                return (current, new);
            } // else, try again
        } else {
            return (current, new);
        }
    }
}

// bare-minimum TinyLfu with CM-Sketch, no doorkeeper for now
pub(crate) struct TinyLfu {
    estimator: Estimator,
    window_counter: AtomicUsize,
    window_limit: usize,
}

impl TinyLfu {
    pub fn get<T: Hash>(&self, key: T) -> u8 {
        self.estimator.get(key)
    }

    pub fn incr<T: Hash>(&self, key: T) -> u8 {
        let window_size = self.window_counter.fetch_add(1, Ordering::Relaxed);
        // When window_size concurrently increases, only one resets the window and age the estimator.
        // > self.window_limit * 2 is a safety net in case for whatever reason window_size grows
        // out of control
        if window_size == self.window_limit || window_size > self.window_limit * 2 {
            self.window_counter.store(0, Ordering::Relaxed);
            self.estimator.age(1); // right shift 1 bit
        }
        self.estimator.incr(key)
    }

    // because we use 8-bits counters, window size can be 256 * the cache size
    pub fn new(cache_size: usize) -> Self {
        Self {
            estimator: Estimator::optimal(cache_size),
            window_counter: Default::default(),
            // 8x: just a heuristic to balance the memory usage and accuracy
            window_limit: cache_size * 8,
        }
    }

    pub fn new_compact(cache_size: usize) -> Self {
        Self {
            estimator: Estimator::compact(cache_size),
            window_counter: Default::default(),
            // 8x: just a heuristic to balance the memory usage and accuracy
            window_limit: cache_size * 8,
        }
    }

    #[cfg(test)]
    pub fn new_seeded(cache_size: usize) -> Self {
        Self {
            estimator: Estimator::seeded(cache_size),
            window_counter: Default::default(),
            // 8x: just a heuristic to balance the memory usage and accuracy
            window_limit: cache_size * 8,
        }
    }

    #[cfg(test)]
    pub fn new_compact_seeded(cache_size: usize) -> Self {
        Self {
            estimator: Estimator::seeded_compact(cache_size),
            window_counter: Default::default(),
            // 8x: just a heuristic to balance the memory usage and accuracy
            window_limit: cache_size * 8,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_cmk_paras() {
        let (slots, hashes) = Estimator::optimal_paras(1_000_000);
        // just smoke check some standard input
        assert_eq!(slots, 2718282);
        assert_eq!(hashes, 20);
    }

    #[test]
    fn test_tiny_lfu() {
        let tiny = TinyLfu::new(1);
        assert_eq!(tiny.get(1), 0);
        assert_eq!(tiny.incr(1), 1);
        assert_eq!(tiny.incr(1), 2);
        assert_eq!(tiny.get(1), 2);

        // Might have hash collisions for the others, need to
        // get() before can assert on the incr() value.
        let two = tiny.get(2);
        assert_eq!(tiny.incr(2), two + 1);
        assert_eq!(tiny.incr(2), two + 2);
        assert_eq!(tiny.get(2), two + 2);

        let three = tiny.get(3);
        assert_eq!(tiny.incr(3), three + 1);
        assert_eq!(tiny.incr(3), three + 2);
        assert_eq!(tiny.incr(3), three + 3);
        assert_eq!(tiny.incr(3), three + 4);

        // 8 incr(), now resets on next incr
        // can only assert they are greater than or equal
        // to the incr() we do per key.

        assert!(tiny.incr(3) >= 3); // had 4, reset to 2, added another.
        assert!(tiny.incr(1) >= 2); // had 2, reset to 1, added another.
        assert!(tiny.incr(2) >= 2); // had 2, reset to 1, added another.
    }
}


================================================
FILE: tinyufo/src/lib.rs
================================================
// Copyright 2026 Cloudflare, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! A In-memory cache implementation with TinyLFU as the admission policy and [S3-FIFO](https://s3fifo.com/) as the eviction policy.
//!
//! TinyUFO improves cache hit ratio noticeably compared to LRU.
//!
//! TinyUFO is lock-free. It is very fast in the systems with a lot concurrent reads and/or writes

use ahash::RandomState;
use crossbeam_queue::SegQueue;
use std::marker::PhantomData;
use std::sync::atomic::AtomicUsize;
use std::sync::atomic::{
    AtomicBool, AtomicU8,
    Ordering::{Acquire, Relaxed, SeqCst},
};
mod buckets;
mod estimation;

use buckets::Buckets;
use estimation::TinyLfu;
use std::hash::Hash;

const SMALL: bool = false;
const MAIN: bool = true;

// Indicate which queue an item is located
#[derive(Debug, Default)]
struct Location(AtomicBool);

impl Location {
    fn new_small() -> Self {
        Self(AtomicBool::new(SMALL))
    }

    fn value(&self) -> bool {
        self.0.load(Relaxed)
    }

    fn is_main(&self) -> bool {
        self.value()
    }

    fn move_to_main(&self) {
        self.0.store(true, Relaxed);
    }
}

// We have 8 bits to spare but we still cap at 3. This is to make sure that the main queue
// in the worst case can find something to evict quickly
const USES_CAP: u8 = 3;

#[derive(Debug, Default)]
struct Uses(AtomicU8);

impl Uses {
    pub fn inc_uses(&self) -> u8 {
        loop {
            let uses = self.uses();
            if uses >= USES_CAP {
                return uses;
            }
            if let Err(new) = self.0.compare_exchange(uses, uses + 1, Acquire, Relaxed) {
                // someone else beat us to it
                if new >= USES_CAP {
                    // already above cap
                    return new;
                } // else, try again
            } else {
                return uses + 1;
            }
        }
    }

    // decrease uses, return the previous value
    pub fn decr_uses(&self) -> u8 {
        loop {
            let uses = self.uses();
            if uses == 0 {
                return 0;
            }
            if let Err(new) = self.0.compare_exchange(uses, uses - 1, Acquire, Relaxed) {
                // someone else beat us to it
                if new == 0 {
                    return 0;
                } // else, try again
            } else {
                return uses;
            }
        }
    }

    pub fn uses(&self) -> u8 {
        self.0.load(Relaxed)
    }
}

type Key = u64;
type Weight = u16;

/// The key-value pair returned from cache eviction
#[derive(Clone)]
pub struct KV<T> {
    /// NOTE: that we currently don't store the Actual key in the cache. This returned value
    /// is just the hash of it.
    pub key: Key,
    pub data: T,
    pub weight: Weight,
}

// the data and its metadata
pub struct Bucket<T> {
    uses: Uses,
    queue: Location,
    weight: Weight,
    data: T,
}

const SMALL_QUEUE_PERCENTAGE: f32 = 0.1;

struct FiFoQueues<T> {
    total_weight_limit: usize,

    small: SegQueue<Key>,
    small_weight: AtomicUsize,

    main: SegQueue<Key>,
    main_weight: AtomicUsize,

    // this replaces the ghost queue of S3-FIFO with similar goal: track the evicted assets
    estimator: TinyLfu,

    _t: PhantomData<T>,
}

impl<T: Clone + Send + Sync + 'static> FiFoQueues<T> {
    fn admit(
        &self,
        key: Key,
        data: T,
        weight: u16,
        ignore_lfu: bool,
        buckets: &Buckets<T>,
    ) -> Vec<KV<T>> {
        // Note that we only use TinyLFU during cache admission but not cache read.
        // So effectively we mostly sketch the popularity of less popular assets.
        // In this way the sketch is a bit more accurate on these assets.
        // Also we don't need another separated window cache to address the sparse burst issue as
        // this sketch doesn't favor very popular assets much.
        let new_freq = self.estimator.incr(key);

        assert!(weight > 0);
        let new_bucket = {
            let Some((uses, queue, weight)) = buckets.get_map(&key, |bucket| {
                // the item exists, in case weight changes
                let old_weight = bucket.weight;
                let uses = bucket.uses.inc_uses();

                fn update_atomic(weight: &AtomicUsize, old: u16, new: u16) {
                    if old == new {
                        return;
                    }
                    if old > new {
                        weight.fetch_sub((old - new) as usize, SeqCst);
                    } else {
                        weight.fetch_add((new - old) as usize, SeqCst);
                    }
                }
                let queue = bucket.queue.is_main();
                if queue == MAIN {
                    update_atomic(&self.main_weight, old_weight, weight);
                } else {
                    update_atomic(&self.small_weight, old_weight, weight);
                }
                (uses, queue, weight)
            }) else {
                let mut evicted = self.evict_to_limit(weight, buckets);
                // TODO: figure out the right way to compare frequencies of different weights across
                // many evicted assets. For now TinyLFU is only used when only evicting 1 item.
                let (key, data, weight) = if !ignore_lfu && evicted.len() == 1 {
                    // Apply the admission algorithm of TinyLFU: compare the incoming new item
                    // and the evicted one. The more popular one is admitted to cache
                    let evicted_first = &evicted[0];
                    let evicted_freq = self.estimator.get(evicted_first.key);
                    if evicted_freq > new_freq {
                        // put it back
                        let first = evicted.pop().expect("just check non-empty");
                        // return the put value
                        evicted.push(KV { key, data, weight });
                        (first.key, first.data, first.weight)
                    } else {
                        (key, data, weight)
                    }
                } else {
                    (key, data, weight)
                };

                let bucket = Bucket {
                    queue: Location::new_small(),
                    weight,
                    uses: Default::default(), // 0
                    data,
                };
                let old = buckets.insert(key, bucket);
                if old.is_none() {
                    // Always push key first before updating weight
                    // If doing the other order, another concurrent thread might not
                    // find things to evict
                    self.small.push(key);
                    self.small_weight.fetch_add(weight as usize, SeqCst);
                } // else: two threads are racing adding the item
                  // TODO: compare old.weight and update accordingly
                return evicted;
            };
            Bucket {
                queue: Location(queue.into()),
                weight,
                uses: Uses(uses.into()),
                data,
            }
        };

        // replace the existing one
        buckets.insert(key, new_bucket);

        // NOTE: there is a chance that the item itself is evicted if it happens to be the one selected
        // by the algorithm. We could avoid this by checking if the item is in the returned evicted items,
        // and then add it back. But to keep the code simple we just allow it to happen.
        self.evict_to_limit(0, buckets)
    }

    // the `extra_weight` is to essentially tell the cache to reserve that amount of weight for
    // admission. It is used when calling `evict_to_limit` before admitting the asset itself.
    fn evict_to_limit(&self, extra_weight: Weight, buckets: &Buckets<T>) -> Vec<KV<T>> {
        let mut evicted = if self.total_weight_limit
            < self.small_weight.load(SeqCst) + self.main_weight.load(SeqCst) + extra_weight as usize
        {
            Vec::with_capacity(1)
        } else {
            vec![]
        };
        while self.total_weight_limit
            < self.small_weight.load(SeqCst) + self.main_weight.load(SeqCst) + extra_weight as usize
        {
            if let Some(evicted_item) = self.evict_one(buckets) {
                evicted.push(evicted_item);
            } else {
                break;
            }
        }

        evicted
    }

    fn evict_one(&self, buckets: &Buckets<T>) -> Option<KV<T>> {
        let evict_small = self.small_weight_limit() <= self.small_weight.load(SeqCst);

        if evict_small {
            let evicted = self.evict_one_from_small(buckets);
            // evict_one_from_small could just promote everything to main without evicting any
            // so need to evict_one_from_main if nothing evicted
            if evicted.is_some() {
                return evicted;
            }
        }
        self.evict_one_from_main(buckets)
    }

    fn small_weight_limit(&self) -> usize {
        (self.total_weight_limit as f32 * SMALL_QUEUE_PERCENTAGE).floor() as usize + 1
    }

    fn evict_one_from_small(&self, buckets: &Buckets<T>) -> Option<KV<T>> {
        loop {
            let Some(to_evict) = self.small.pop() else {
                // empty queue, this is caught between another pop() and fetch_sub()
                return None;
            };

            let v = buckets
                .get_map(&to_evict, |bucket| {
                    let weight = bucket.weight;
                    self.small_weight.fetch_sub(weight as usize, SeqCst);

                    if bucket.uses.uses() > 1 {
                        // move to main
                        bucket.queue.move_to_main();
                        self.main.push(to_evict);
                        self.main_weight.fetch_add(weight as usize, SeqCst);
                        // continue until find one to evict
                        None
                    } else {
                        let data = bucket.data.clone();
                        let weight = bucket.weight;
                        buckets.remove(&to_evict);
                        Some(KV {
                            key: to_evict,
                            data,
                            weight,
                        })
                    }
                })
                .flatten();
            if v.is_some() {
                // found the one to evict, break
                return v;
            }
        }
    }

    fn evict_one_from_main(&self, buckets: &Buckets<T>) -> Option<KV<T>> {
        loop {
            let to_evict = self.main.pop()?;

            if let Some(v) = buckets
                .get_map(&to_evict, |bucket| {
                    if bucket.uses.decr_uses() > 0 {
                        // put it back
                        self.main.push(to_evict);
                        // continue the loop
                        None
                    } else {
                        // evict
                        let weight = bucket.weight;
                        self.main_weight.fetch_sub(weight as usize, SeqCst);
                        let data = bucket.data.clone();
                        buckets.remove(&to_evict);
                        Some(KV {
                            key: to_evict,
                            data,
                            weight,
                        })
                    }
                })
                .flatten()
            {
                // found the one to evict, break
                return Some(v);
            }
        }
    }
}

/// [TinyUfo] cache
pub struct TinyUfo<K, T> {
    queues: FiFoQueues<T>,
    buckets: Buckets<T>,
    random_status: RandomState,
    _k: PhantomData<K>,
}
impl<K: Hash, T: Clone + Send + Sync + 'static> TinyUfo<K, T> {
    /// Create a new TinyUfo cache with the given weight limit and the given
    /// size limit of the ghost queue.
    pub fn new(total_weight_limit: usize, estimated_size: usize) -> Self {
        let queues = FiFoQueues {
            small: SegQueue::new(),
            small_weight: 0.into(),
            main: SegQueue::new(),
            main_weight: 0.into(),
            total_weight_limit,
            estimator: TinyLfu::new(estimated_size),
            _t: PhantomData,
        };
        TinyUfo {
            queues,
            buckets: Buckets::new_fast(estimated_size),
            random_status: RandomState::new(),
            _k: PhantomData,
        }
    }

    /// Create a new TinyUfo cache but with more memory efficient data structures.
    /// The trade-off is that the the get() is slower by a constant factor.
    /// The cache hit ratio could be higher as this type of TinyUFO allows to store
    /// more assets with the same memory.
    pub fn new_compact(total_weight_limit: usize, estimated_size: usize) -> Self {
        let queues = FiFoQueues {
            small: SegQueue::new(),
            small_weight: 0.into(),
            main: SegQueue::new(),
            main_weight: 0.into(),
            total_weight_limit,
            estimator: TinyLfu::new_compact(estimated_size),
            _t: PhantomData,
        };
        TinyUfo {
            queues,
            buckets: Buckets::new_compact(estimated_size, 32),
            random_status: RandomState::new(),
            _k: PhantomData,
        }
    }

    // TODO: with_capacity()

    /// Read the given key
    ///
    /// Return Some(T) if the key exists
    pub fn get(&self, key: &K) -> Option<T> {
        let key = self.random_status.hash_one(key);
        self.buckets.get_map(&key, |p| {
            p.uses.inc_uses();
            p.data.clone()
        })
    }

    /// Put the key value to the [TinyUfo]
    ///
    /// Return a list of [KV] of key and `T` that are evicted
    pub fn put(&self, key: K, data: T, weight: Weight) -> Vec<KV<T>> {
        let key = self.random_status.hash_one(&key);
        self.queues.admit(key, data, weight, false, &self.buckets)
    }

    /// Remove the given key from the cache if it exists
    ///
    /// Returns Some(T) if the key was found and removed, None otherwise
    pub fn remove(&self, key: &K) -> Option<T> {
        let key = self.random_status.hash_one(key);

        // Get data and update weights
        let result = self.buckets.get_map(&key, |bucket| {
            let data = bucket.data.clone();
            let weight = bucket.weight;

            // Update weight based on queue location
            if bucket.queue.is_main() {
                self.queues.main_weight.fetch_sub(weight as usize, SeqCst);
            } else {
                self.queues.small_weight.fetch_sub(weight as usize, SeqCst);
            }

            data
        });

        // If we found and processed the item, remove it from buckets
        if result.is_some() {
            self.buckets.remove(&key);
        }

        result
    }
    /// Always put the key value to the [TinyUfo]
    ///
    /// Return a list of [KV] of key and `T` that are evicted
    ///
    /// Similar to [Self::put] but guarantee the assertion of the asset.
    /// In [Self::put], the TinyLFU check may reject putting the current asset if it is less
    /// popular than the once being evicted.
    ///
    /// In some real world use cases, a few reads to the same asset may be pending for the put action
    /// to be finished so that they can read the asset from cache. Neither the above behaviors are ideal
    /// for this use case.
    ///
    /// Compared to [Self::put], the hit ratio when using this function is reduced by about 0.5pp or less in
    /// under zipf workloads.
    pub fn force_put(&self, key: K, data: T, weight: Weight) -> Vec<KV<T>> {
        let key = self.random_status.hash_one(&key);
        self.queues.admit(key, data, weight, true, &self.buckets)
    }

    #[cfg(test)]
    fn peek_queue(&self, key: K) -> Option<bool> {
        let key = self.random_status.hash_one(&key);
        self.buckets.get_queue(&key)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_uses() {
        let uses: Uses = Default::default();
        assert_eq!(uses.uses(), 0);
        uses.inc_uses();
        assert_eq!(uses.uses(), 1);
        for _ in 0..USES_CAP {
            uses.inc_uses();
        }
        assert_eq!(uses.uses(), USES_CAP);

        for _ in 0..USES_CAP + 2 {
            uses.decr_uses();
        }
        assert_eq!(uses.uses(), 0);
    }

    #[test]
    fn test_evict_from_small() {
        let mut cache = TinyUfo::new(5, 5);
        cache.random_status = RandomState::with_seeds(2, 3, 4, 5);
        cache.queues.estimator = TinyLfu::new_seeded(5);

        cache.put(1, 1, 1);
        cache.put(2, 2, 2);
        cache.put(3, 3, 2);
        // cache full now

        assert_eq!(cache.peek_queue(1), Some(SMALL));
        assert_eq!(cache.peek_queue(2), Some(SMALL));
        assert_eq!(cache.peek_queue(3), Some(SMALL));

        let evicted = cache.put(4, 4, 3);
        assert_eq!(evicted.len(), 2);
        assert_eq!(evicted[0].data, 1);
        assert_eq!(evicted[1].data, 2);

        assert_eq!(cache.peek_queue(1), None);
        assert_eq!(cache.peek_queue(2), None);
        assert_eq!(cache.peek_queue(3), Some(SMALL));
    }

    #[test]
    fn test_evict_from_small_to_main() {
        let mut cache = TinyUfo::new(5, 5);
        cache.random_status = RandomState::with_seeds(2, 3, 4, 5);
        cache.queues.estimator = TinyLfu::new_seeded(5);

        cache.put(1, 1, 1);
        cache.put(2, 2, 2);
        cache.put(3, 3, 2);
        // cache full now

        cache.get(&1);
        cache.get(&1); // 1 will be moved to main during next eviction

        assert_eq!(cache.peek_queue(1), Some(SMALL));
        assert_eq!(cache.peek_queue(2), Some(SMALL));
        assert_eq!(cache.peek_queue(3), Some(SMALL));

        let evicted = cache.put(4, 4, 2);
        assert_eq!(evicted.len(), 1);
        assert_eq!(evicted[0].weight, 2);

        assert_eq!(cache.peek_queue(1), Some(MAIN));
        // either 2, 3, or 4 was evicted. Check evicted for which.
        let mut remaining = vec![2, 3, 4];
        remaining.remove(
            remaining
                .iter()
                .position(|x| *x == evicted[0].data)
                .unwrap(),
        );
        assert_eq!(cache.peek_queue(evicted[0].key), None);
        for k in remaining {
            assert_eq!(cache.peek_queue(k), Some(SMALL));
        }
    }

    #[test]
    fn test_evict_reentry() {
        let mut cache = TinyUfo::new(5, 5);
        cache.random_status = RandomState::with_seeds(2, 3, 4, 5);
        cache.queues.estimator = TinyLfu::new_seeded(5);

        cache.put(1, 1, 1);
        cache.put(2, 2, 2);
        cache.put(3, 3, 2);
        // cache full now

        assert_eq!(cache.peek_queue(1), Some(SMALL));
        assert_eq!(cache.peek_queue(2), Some(SMALL));
        assert_eq!(cache.peek_queue(3), Some(SMALL));

        let evicted = cache.put(4, 4, 1);
        assert_eq!(evicted.len(), 1);
        assert_eq!(evicted[0].data, 1);

        assert_eq!(cache.peek_queue(1), None);
        assert_eq!(cache.peek_queue(2), Some(SMALL));
        assert_eq!(cache.peek_queue(3), Some(SMALL));
        assert_eq!(cache.peek_queue(4), Some(SMALL));

        let evicted = cache.put(1, 1, 1);
        assert_eq!(evicted.len(), 1);
        assert_eq!(evicted[0].data, 2);

        assert_eq!(cache.peek_queue(1), Some(SMALL));
        assert_eq!(cache.peek_queue(2), None);
        assert_eq!(cache.peek_queue(3), Some(SMALL));
        assert_eq!(cache.peek_queue(4), Some(SMALL));
    }

    #[test]
    fn test_evict_entry_denied() {
        let mut cache = TinyUfo::new(5, 5);
        cache.random_status = RandomState::with_seeds(2, 3, 4, 5);
        cache.queues.estimator = TinyLfu::new_seeded(5);

        cache.put(1, 1, 1);
        cache.put(2, 2, 2);
        cache.put(3, 3, 2);
        // cache full now

        assert_eq!(cache.peek_queue(1), Some(SMALL));
        assert_eq!(cache.peek_queue(2), Some(SMALL));
        assert_eq!(cache.peek_queue(3), Some(SMALL));

        // trick: put a few times to bump their frequencies
        cache.put(1, 1, 1);
        cache.put(2, 2, 2);
        cache.put(3, 3, 2);

        let evicted = cache.put(4, 4, 1);
        assert_eq!(evicted.len(), 1);
        assert_eq!(evicted[0].data, 4); // 4 is returned

        assert_eq!(cache.peek_queue(1), Some(SMALL));
        assert_eq!(cache.peek_queue(2), Some(SMALL));
        assert_eq!(cache.peek_queue(3), Some(SMALL));
        assert_eq!(cache.peek_queue(4), None);
    }

    #[test]
    fn test_force_put() {
        let mut cache = TinyUfo::new(5, 5);
        cache.random_status = RandomState::with_seeds(2, 3, 4, 5);
        cache.queues.estimator = TinyLfu::new_seeded(5);

        cache.put(1, 1, 1);
        cache.put(2, 2, 2);
        cache.put(3, 3, 2);
        // cache full now

        assert_eq!(cache.peek_queue(1), Some(SMALL));
        assert_eq!(cache.peek_queue(2), Some(SMALL));
        assert_eq!(cache.peek_queue(3), Some(SMALL));

        // trick: put a few times to bump their frequencies
        cache.put(1, 1, 1);
        cache.put(2, 2, 2);
        cache.put(3, 3, 2);

        // force put will replace 1 with 4 even through 1 is more popular
        let evicted = cache.force_put(4, 4, 1);
        assert_eq!(evicted.len(), 1);
        assert_eq!(evicted[0].data, 1); // 1 is returned

        assert_eq!(cache.peek_queue(1), None);
        assert_eq!(cache.peek_queue(2), Some(SMALL));
        assert_eq!(cache.peek_queue(3), Some(SMALL));
        assert_eq!(cache.peek_queue(4), Some(SMALL));
    }

    #[test]
    fn test_evict_from_main() {
        let mut cache = TinyUfo::new(5, 5);
        cache.random_status = RandomState::with_seeds(2, 3, 4, 5);
        cache.queues.estimator = TinyLfu::new_seeded(5);

        cache.put(1, 1, 1);
        cache.put(2, 2, 2);
        cache.put(3, 3, 2);
        // cache full now

        // all 3 will qualify to main
        cache.get(&1);
        cache.get(&1);
        cache.get(&2);
        cache.get(&2);
        cache.get(&3);
        cache.get(&3);

        let evicted = cache.put(4, 4, 1);
        assert_eq!(evicted.len(), 1);
        assert_eq!(evicted[0].data, 1);

        // 1 kicked from main
        assert_eq!(cache.peek_queue(1), None);
        assert_eq!(cache.peek_queue(2), Some(MAIN));
        assert_eq!(cache.peek_queue(3), Some(MAIN));
        assert_eq!(cache.peek_queue(4), Some(SMALL));

        let evicted = cache.put(1, 1, 1);
        assert_eq!(evicted.len(), 1);
        assert_eq!(evicted[0].data, 4);

        assert_eq!(cache.peek_queue(1), Some(SMALL));
        assert_eq!(cache.peek_queue(2), Some(MAIN));
        assert_eq!(cache.peek_queue(3), Some(MAIN));
        assert_eq!(cache.peek_queue(4), None);
    }

    #[test]
    fn test_evict_from_small_compact() {
        let mut cache = TinyUfo::new(5, 5);
        cache.random_status = RandomState::with_seeds(2, 3, 4, 5);
        cache.queues.estimator = TinyLfu::new_compact_seeded(5);

        cache.put(1, 1, 1);
        cache.put(2, 2, 2);
        cache.put(3, 3, 2);
        // cache full now

        assert_eq!(cache.peek_queue(1), Some(SMALL));
        assert_eq!(cache.peek_queue(2), Some(SMALL));
        assert_eq!(cache.peek_queue(3), Some(SMALL));

        let evicted = cache.put(4, 4, 3);
        assert_eq!(evicted.len(), 2);
        assert_eq!(evicted[0].data, 1);
        assert_eq!(evicted[1].data, 2);

        assert_eq!(cache.peek_queue(1), None);
        assert_eq!(cache.peek_queue(2), None);
        assert_eq!(cache.peek_queue(3), Some(SMALL));
    }

    #[test]
    fn test_evict_from_small_to_main_compact() {
        let mut cache = TinyUfo::new(5, 5);
        cache.random_status = RandomState::with_seeds(2, 3, 4, 5);
        cache.queues.estimator = TinyLfu::new_compact_seeded(5);

        cache.put(1, 1, 1);
        cache.put(2, 2, 2);
        cache.put(3, 3, 2);
        // cache full now

        cache.get(&1);
        cache.get(&1); // 1 will be moved to main during next eviction

        assert_eq!(cache.peek_queue(1), Some(SMALL));
        assert_eq!(cache.peek_queue(2), Some(SMALL));
        assert_eq!(cache.peek_queue(3), Some(SMALL));

        let evicted = cache.put(4, 4, 2);
        assert_eq!(evicted.len(), 1);
        assert_eq!(evicted[0].weight, 2);

        assert_eq!(cache.peek_queue(1), Some(MAIN));
        // either 2, 3, or 4 was evicted. Check evicted for which.
        let mut remaining = vec![2, 3, 4];
        remaining.remove(
            remaining
                .iter()
                .position(|x| *x == evicted[0].data)
                .unwrap(),
        );
        assert_eq!(cache.peek_queue(evicted[0].key), None);
        for k in remaining {
            assert_eq!(cache.peek_queue(k), Some(SMALL));
        }
    }
    #[test]
    fn test_remove() {
        let mut cache = TinyUfo::new(5, 5);
        cache.random_status = RandomState::with_seeds(2, 3, 4, 5);

        cache.put(1, 1, 1);
        cache.put(2, 2, 2);
        cache.put(3, 3, 2);

        assert_eq!(cache.remove(&1), Some(1));
        assert_eq!(cache.remove(&3), Some(3));
        assert_eq!(cache.get(&1), None);
        assert_eq!(cache.get(&3), None);

        // Verify empty keys get evicted when cache fills up
        // Fill cache to trigger eviction
        cache.put(5, 5, 2);
        cache.put(6, 6, 2);
        cache.put(7, 7, 2);

        // The removed items (1, 3) should be naturally evicted now
        // and new items should be in cache
        assert_eq!(cache.get(&1), None);
        assert_eq!(cache.get(&3), None);
        assert!(cache.get(&5).is_some() || cache.get(&6).is_some() || cache.get(&7).is_some());

        // Test weights after eviction cycles
        let total_weight =
            cache.queues.small_weight.load(SeqCst) + cache.queues.main_weight.load(SeqCst);
        assert!(total_weight <= 5); // Should not exceed limit
    }
}