Repository: cloudwego/netpoll
Branch: main
Commit: fcc5e9d814c8
Files: 92
Total size: 356.8 KB

Directory structure:
gitextract_ncyzmgn6/

├── .github/
│   ├── CODEOWNERS
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug_report.md
│   │   └── feature_request.md
│   ├── PULL_REQUEST_TEMPLATE.md
│   └── workflows/
│       └── pr-check.yml
├── .gitignore
├── .golangci.yaml
├── .licenserc.yaml
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── CREDITS
├── LICENSE
├── NOTICE
├── README.md
├── README_CN.md
├── _typos.toml
├── connection.go
├── connection_errors.go
├── connection_errors_test.go
├── connection_impl.go
├── connection_lock.go
├── connection_onevent.go
├── connection_reactor.go
├── connection_test.go
├── docs/
│   ├── guide/
│   │   ├── guide_cn.md
│   │   └── guide_en.md
│   └── reference/
│       ├── design_cn.md
│       ├── design_en.md
│       └── explain.md
├── eventloop.go
├── fd_operator.go
├── fd_operator_cache.go
├── fd_operator_cache_test.go
├── go.mod
├── go.sum
├── internal/
│   └── runner/
│       ├── runner.go
│       └── runner_test.go
├── lint.sh
├── mux/
│   ├── mux_test.go
│   ├── shard_queue.go
│   └── shard_queue_test.go
├── net_dialer.go
├── net_dialer_test.go
├── net_io.go
├── net_listener.go
├── net_listener_test.go
├── net_netfd.go
├── net_netfd_conn.go
├── net_polldesc.go
├── net_polldesc_test.go
├── net_sock.go
├── net_tcpsock.go
├── net_unixsock.go
├── netpoll_config.go
├── netpoll_options.go
├── netpoll_server.go
├── netpoll_unix.go
├── netpoll_unix_test.go
├── netpoll_windows.go
├── nocopy.go
├── nocopy_linkbuffer.go
├── nocopy_linkbuffer_norace.go
├── nocopy_linkbuffer_race.go
├── nocopy_linkbuffer_test.go
├── nocopy_readwriter.go
├── nocopy_readwriter_test.go
├── poll.go
├── poll_default.go
├── poll_default_bsd.go
├── poll_default_bsd_norace.go
├── poll_default_bsd_race.go
├── poll_default_linux.go
├── poll_default_linux_norace.go
├── poll_default_linux_race.go
├── poll_default_linux_test.go
├── poll_loadbalance.go
├── poll_manager.go
├── poll_manager_test.go
├── poll_test.go
├── sys_epoll_linux.go
├── sys_epoll_linux_arm64.go
├── sys_epoll_linux_loong64.go
├── sys_exec.go
├── sys_exec_test.go
├── sys_keepalive_darwin.go
├── sys_keepalive_openbsd.go
├── sys_keepalive_unix.go
├── sys_sendmsg_bsd.go
├── sys_sendmsg_linux.go
├── sys_sockopt_bsd.go
├── sys_sockopt_linux.go
└── test_conns.sh

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/CODEOWNERS
================================================
# For more information, please refer to https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners

*   @cloudwego/netpoll-reviewers @cloudwego/netpoll-approvers @cloudwego/netpoll-maintainers


================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.md
================================================
---
name: Bug report
about: Create a report to help us improve
title: ''
labels: ''
assignees: ''

---

**Describe the bug**
A clear and concise description of what the bug is.

**To Reproduce**
Steps to reproduce the behavior:
1. Go to '...'
2. Click on '....'
3. Scroll down to '....'
4. See error

**Expected behavior**
A clear and concise description of what you expected to happen.

**Screenshots**
If applicable, add screenshots to help explain your problem.

**Desktop (please complete the following information):**
 - OS: [e.g. iOS]
 - Browser [e.g. chrome, safari]
 - Version [e.g. 22]

**Smartphone (please complete the following information):**
 - Device: [e.g. iPhone6]
 - OS: [e.g. iOS8.1]
 - Browser [e.g. stock browser, safari]
 - Version [e.g. 22]

**Additional context**
Add any other context about the problem here.


================================================
FILE: .github/ISSUE_TEMPLATE/feature_request.md
================================================
---
name: Feature request
about: Suggest an idea for this project
title: ''
labels: ''
assignees: ''

---

**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]

**Describe the solution you'd like**
A clear and concise description of what you want to happen.

**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.

**Additional context**
Add any other context or screenshots about the feature request here.


================================================
FILE: .github/PULL_REQUEST_TEMPLATE.md
================================================


================================================
FILE: .github/workflows/pr-check.yml
================================================
name: Push and Pull Request Check

on: [ push, pull_request ]

jobs:
  compatibility-test:
    strategy:
      matrix:
        go: [ 1.18, 1.24 ]
        os: [ ubuntu-latest, ubuntu-24.04-arm, macos-latest ]
    runs-on: ${{ matrix.os }}
    steps:
      - uses: actions/checkout@v4
      - name: Set up Go
        uses: actions/setup-go@v5
        with:
          go-version: ${{ matrix.go }}
      - name: Unit Test
        run: go test -timeout=2m -race ./...
      - name: Benchmark
        run: go test -bench=. -benchmem -run=none ./... -benchtime=100ms

  windows-test:
    runs-on: windows-latest
    steps:
      - uses: actions/checkout@v4
      - name: Set up Go
        uses: actions/setup-go@v5
        with:
          go-version: stable
      - name: Build Test
        run: go vet ./...

  compliant:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Check License Header
        uses: apache/skywalking-eyes/header@v0.4.0
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

      - name: Check Spell
        uses: crate-ci/typos@v1.13.14

  golangci-lint:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - name: Set up Go
        uses: actions/setup-go@v5
        with:
          go-version: stable
          # for self-hosted, the cache path is shared across projects
          # and it works well without the cache of github actions
          # Enable it if we're going to use Github only
          cache: false

      - name: Golangci Lint
        # https://golangci-lint.run/
        uses: golangci/golangci-lint-action@v6
        with:
          version: latest
          only-new-issues: true


================================================
FILE: .gitignore
================================================
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib

# Test binary, built with `go test -c`
*.test

# Output of the go coverage tool, specifically when used with LiteIDE
*.out

# Dependency directories (remove the comment below to include it)
# vendor/
.idea/

================================================
FILE: .golangci.yaml
================================================
# Options for analysis running.
run:
  timeout: 3m

linters: # https://golangci-lint.run/usage/linters/
  disable-all: true
  enable:
    - gosimple
    - govet
    - ineffassign
    - staticcheck
    - unused
    - unconvert
    - goimports
    - gofumpt

# Refer to https://golangci-lint.run/usage/linters
linters-settings:
  gofumpt:
    # Choose whether to use the extra rules.
    # Default: false
    extra-rules: true
  goimports:
    # Put imports beginning with prefix after 3rd-party packages.
    # It's a comma-separated list of prefixes.
    local-prefixes: github.com/cloudwego/netpoll

issues:
  exclude-use-default: true

================================================
FILE: .licenserc.yaml
================================================
header:
  license:
    spdx-id: Apache-2.0
    copyright-owner: CloudWeGo Authors

  paths:
    - '**/*.go'
    - '**/*.s'

  paths-ignore:
    - 'net_netfd.go'
    - 'net_sock.go'
    - 'net_tcpsock.go'
    - 'net_unixsock.go'
    - 'sys_sockopt_bsd.go'
    - 'sys_sockopt_linux.go'

  comment: on-failure

================================================
FILE: CODE_OF_CONDUCT.md
================================================
# Contributor Covenant Code of Conduct

## Our Pledge

We as members, contributors, and leaders pledge to make participation in our
community a harassment-free experience for everyone, regardless of age, body
size, visible or invisible disability, ethnicity, sex characteristics, gender
identity and expression, level of experience, education, socio-economic status,
nationality, personal appearance, race, religion, or sexual identity
and orientation.

We pledge to act and interact in ways that contribute to an open, welcoming,
diverse, inclusive, and healthy community.

## Our Standards

Examples of behavior that contributes to a positive environment for our
community include:

* Demonstrating empathy and kindness toward other people
* Being respectful of differing opinions, viewpoints, and experiences
* Giving and gracefully accepting constructive feedback
* Accepting responsibility and apologizing to those affected by our mistakes,
  and learning from the experience
* Focusing on what is best not just for us as individuals, but for the
  overall community

Examples of unacceptable behavior include:

* The use of sexualized language or imagery, and sexual attention or
  advances of any kind
* Trolling, insulting or derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or email
  address, without their explicit permission
* Other conduct which could reasonably be considered inappropriate in a
  professional setting

## Enforcement Responsibilities

Community leaders are responsible for clarifying and enforcing our standards of
acceptable behavior and will take appropriate and fair corrective action in
response to any behavior that they deem inappropriate, threatening, offensive,
or harmful.

Community leaders have the right and responsibility to remove, edit, or reject
comments, commits, code, wiki edits, issues, and other contributions that are
not aligned to this Code of Conduct, and will communicate reasons for moderation
decisions when appropriate.

## Scope

This Code of Conduct applies within all community spaces, and also applies when
an individual is officially representing the community in public spaces.
Examples of representing our community include using an official e-mail address,
posting via an official social media account, or acting as an appointed
representative at an online or offline event.

## Enforcement

Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported to the community leaders responsible for enforcement at
conduct@cloudwego.io.
All complaints will be reviewed and investigated promptly and fairly.

All community leaders are obligated to respect the privacy and security of the
reporter of any incident.

## Enforcement Guidelines

Community leaders will follow these Community Impact Guidelines in determining
the consequences for any action they deem in violation of this Code of Conduct:

### 1. Correction

**Community Impact**: Use of inappropriate language or other behavior deemed
unprofessional or unwelcome in the community.

**Consequence**: A private, written warning from community leaders, providing
clarity around the nature of the violation and an explanation of why the
behavior was inappropriate. A public apology may be requested.

### 2. Warning

**Community Impact**: A violation through a single incident or series
of actions.

**Consequence**: A warning with consequences for continued behavior. No
interaction with the people involved, including unsolicited interaction with
those enforcing the Code of Conduct, for a specified period of time. This
includes avoiding interactions in community spaces as well as external channels
like social media. Violating these terms may lead to a temporary or
permanent ban.

### 3. Temporary Ban

**Community Impact**: A serious violation of community standards, including
sustained inappropriate behavior.

**Consequence**: A temporary ban from any sort of interaction or public
communication with the community for a specified period of time. No public or
private interaction with the people involved, including unsolicited interaction
with those enforcing the Code of Conduct, is allowed during this period.
Violating these terms may lead to a permanent ban.

### 4. Permanent Ban

**Community Impact**: Demonstrating a pattern of violation of community
standards, including sustained inappropriate behavior,  harassment of an
individual, or aggression toward or disparagement of classes of individuals.

**Consequence**: A permanent ban from any sort of public interaction within
the community.

## Attribution

This Code of Conduct is adapted from the [Contributor Covenant][homepage],
version 2.0, available at
https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.

Community Impact Guidelines were inspired by [Mozilla's code of conduct
enforcement ladder](https://github.com/mozilla/diversity).

[homepage]: https://www.contributor-covenant.org

For answers to common questions about this code of conduct, see the FAQ at
https://www.contributor-covenant.org/faq. Translations are available at
https://www.contributor-covenant.org/translations.


================================================
FILE: CONTRIBUTING.md
================================================
# How to Contribute

## Your First Pull Request
We use github for our codebase. You can start by reading [How To Pull Request](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests).

## Without Semantic Versioning
We keep the stable code in branch `main` like `golang.org/x`. Development base on branch `develop`. And we promise the **Forward Compatibility** by adding new package directory with suffix `v2/v3` when code has break changes.

## Branch Organization
We use [git-flow](https://nvie.com/posts/a-successful-git-branching-model/) as our branch organization, as known as [FDD](https://en.wikipedia.org/wiki/Feature-driven_development)

## Bugs
### 1. How to Find Known Issues
We are using [Github Issues](https://github.com/cloudwego/netpoll/issues) for our public bugs. We keep a close eye on this and try to make it clear when we have an internal fix in progress. Before filing a new task, try to make sure your problem doesn’t already exist.

### 2. Reporting New Issues
Providing a reduced test code is a recommended way for reporting issues. Then can be placed in:
- Just in issues
- [Golang Playground](https://play.golang.org/)

### 3. Security Bugs
Please do not report the safe disclosure of bugs to public issues. Contact us by [Support Email](mailto:conduct@cloudwego.io)

## How to Get in Touch
- [Email](mailto:conduct@cloudwego.io)

## Submit a Pull Request
Before you submit your Pull Request (PR) consider the following guidelines:
1. Search [GitHub](https://github.com/cloudwego/netpoll/pulls) for an open or closed PR that relates to your submission. You don't want to duplicate existing efforts.
2. Be sure that an issue describes the problem you're fixing, or documents the design for the feature you'd like to add. Discussing the design upfront helps to ensure that we're ready to accept your work.
3. [Fork](https://docs.github.com/en/github/getting-started-with-github/fork-a-repo) the cloudwego/netpoll repo.
4. In your forked repository, make your changes in a new git branch:
    ```
    git checkout -b my-fix-branch main
    ```
5. Create your patch, including appropriate test cases.
6. Follow our [Style Guides](#code-style-guides).
7. Commit your changes using a descriptive commit message that follows [AngularJS Git Commit Message Conventions](https://docs.google.com/document/d/1QrDFcIiPjSLDn3EL15IJygNPiHORgU1_OOAqWjiDU5Y/edit).
   Adherence to these conventions is necessary because release notes are automatically generated from these messages.
8. Push your branch to GitHub:
    ```
    git push origin my-fix-branch
    ```
9. In GitHub, send a pull request to `netpoll:main`

## Contribution Prerequisites
- Our development environment keeps up with [Go Official](https://golang.org/project/).
- You need to fully check with lint tools before submitting your pull request. [gofmt](https://golang.org/pkg/cmd/gofmt/) and [golangci-lint](https://github.com/golangci/golangci-lint)
- You are familiar with [Github](https://github.com)
- Maybe you need to be familiar with [Actions](https://github.com/features/actions)(our default workflow tool).

## Code Style Guides
- [Effective Go](https://golang.org/doc/effective_go)
- [Go Code Review Comments](https://github.com/golang/go/wiki/CodeReviewComments)


================================================
FILE: CREDITS
================================================


================================================
FILE: LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: NOTICE
================================================
CloudWeGO
Copyright 2022 CloudWeGO authors.

Go
Copyright (c) 2009 The Go Authors.

================================================
FILE: README.md
================================================
# CloudWeGo-Netpoll

[中文](README_CN.md)

[![Release](https://img.shields.io/github/v/release/cloudwego/netpoll)](https://github.com/cloudwego/netpoll/releases)
[![WebSite](https://img.shields.io/website?up_message=cloudwego&url=https%3A%2F%2Fwww.cloudwego.io%2F)](https://www.cloudwego.io/)
[![License](https://img.shields.io/github/license/cloudwego/netpoll)](https://github.com/cloudwego/netpoll/blob/main/LICENSE)
[![Go Report Card](https://goreportcard.com/badge/github.com/cloudwego/netpoll)](https://goreportcard.com/report/github.com/cloudwego/netpoll)
[![OpenIssue](https://img.shields.io/github/issues/cloudwego/netpoll)](https://github.com/cloudwego/netpoll/issues)
[![ClosedIssue](https://img.shields.io/github/issues-closed/cloudwego/netpoll)](https://github.com/cloudwego/netpoll/issues?q=is%3Aissue+is%3Aclosed)
![Stars](https://img.shields.io/github/stars/cloudwego/netpoll)
![Forks](https://img.shields.io/github/forks/cloudwego/netpoll)

## Introduction

[Netpoll][Netpoll] is a high-performance non-blocking I/O networking framework, which
focused on RPC scenarios, developed by [ByteDance][ByteDance].

RPC is usually heavy on processing logic and therefore cannot handle I/O serially. But Go's standard
library [net][net] is designed for blocking I/O APIs, so that the RPC framework can
only follow the One Conn One Goroutine design. It will waste a lot of cost for context switching, due to a large number
of goroutines under high concurrency. Besides, [net.Conn][net.Conn] has
no API to check Alive, so it is difficult to make an efficient connection pool for RPC framework, because there may be a
large number of failed connections in the pool.

On the other hand, the open source community currently lacks Go network libraries that focus on RPC scenarios. Similar
repositories such as: [evio][evio], [gnet][gnet], etc., are all
focus on scenarios like [Redis][Redis], [HAProxy][HAProxy].

But now, [Netpoll][Netpoll] was born and solved the above problems. It draws inspiration
from the design of [evio][evio] and [netty][netty], has
excellent [Performance](#performance), and is more suitable for microservice architecture.
Also [Netpoll][Netpoll] provides a number of [Features](#features), and it is recommended
to replace [net][net] in some RPC scenarios.

We developed the RPC framework [Kitex][Kitex] and HTTP framework [Hertz][Hertz]
based on [Netpoll][Netpoll], both with industry-leading performance.

[Examples][netpoll-examples] show how to build RPC client and server
using [Netpoll][Netpoll].

For more information, please refer to [Document](#document).

## Features

* **Already**
    - [LinkBuffer][LinkBuffer] provides nocopy API for streaming reading and writing
    - [gopool][gopool] provides high-performance goroutine pool
    - [mcache][mcache] provides efficient memory reuse
    - `IsActive` supports checking whether the connection is alive
    - `Dialer` supports building clients
    - `EventLoop` supports building a server
    - TCP, Unix Domain Socket
    - Linux, macOS (operating system)

* **Unsupported**
    - Windows (operating system)

## Performance

Benchmark should meet the requirements of industrial use. 
In the RPC scenario, concurrency and timeout are necessary support items.

We provide the [netpoll-benchmark][netpoll-benchmark] project to track and compare 
the performance of [Netpoll][Netpoll] and other frameworks under different conditions for reference.

More benchmarks reference [kitex-benchmark][kitex-benchmark] and [hertz-benchmark][hertz-benchmark].

## Reference

* [Official Website](https://www.cloudwego.io)
* [Getting Started](docs/guide/guide_en.md)
* [Design](docs/reference/design_en.md)
* [Why DATA RACE](docs/reference/explain.md)

[Netpoll]: https://github.com/cloudwego/netpoll
[net]: https://github.com/golang/go/tree/master/src/net
[net.Conn]: https://github.com/golang/go/blob/master/src/net/net.go
[evio]: https://github.com/tidwall/evio
[gnet]: https://github.com/panjf2000/gnet
[netty]: https://github.com/netty/netty
[Kitex]: https://github.com/cloudwego/kitex
[Hertz]: https://github.com/cloudwego/hertz

[netpoll-benchmark]: https://github.com/cloudwego/netpoll-benchmark
[kitex-benchmark]: https://github.com/cloudwego/kitex-benchmark
[hertz-benchmark]: https://github.com/cloudwego/hertz-benchmark
[netpoll-examples]:https://github.com/cloudwego/netpoll-examples

[ByteDance]: https://www.bytedance.com
[Redis]: https://redis.io
[HAProxy]: http://www.haproxy.org

[LinkBuffer]: nocopy_linkbuffer.go
[gopool]: https://github.com/bytedance/gopkg/tree/develop/util/gopool
[mcache]: https://github.com/bytedance/gopkg/tree/develop/lang/mcache


================================================
FILE: README_CN.md
================================================
# CloudWeGo-Netpoll

[English](README.md)

[![Release](https://img.shields.io/github/v/release/cloudwego/netpoll)](https://github.com/cloudwego/netpoll/releases)
[![WebSite](https://img.shields.io/website?up_message=cloudwego&url=https%3A%2F%2Fwww.cloudwego.io%2F)](https://www.cloudwego.io/)
[![License](https://img.shields.io/github/license/cloudwego/netpoll)](https://github.com/cloudwego/netpoll/blob/main/LICENSE)
[![Go Report Card](https://goreportcard.com/badge/github.com/cloudwego/netpoll)](https://goreportcard.com/report/github.com/cloudwego/netpoll)
[![OpenIssue](https://img.shields.io/github/issues/cloudwego/netpoll)](https://github.com/cloudwego/netpoll/issues)
[![ClosedIssue](https://img.shields.io/github/issues-closed/cloudwego/netpoll)](https://github.com/cloudwego/netpoll/issues?q=is%3Aissue+is%3Aclosed)
![Stars](https://img.shields.io/github/stars/cloudwego/netpoll)
![Forks](https://img.shields.io/github/forks/cloudwego/netpoll)

## 简介

[Netpoll][Netpoll] 是由 [字节跳动][ByteDance] 开发的高性能 NIO(Non-blocking I/O)
网络库，专注于 RPC 场景。

RPC 通常有较重的处理逻辑，因此无法串行处理 I/O。而 Go 的标准库 [net][net] 设计了 BIO(Blocking I/O) 模式的
API，使得 RPC 框架设计上只能为每个连接都分配一个 goroutine。 这在高并发下，会产生大量的
goroutine，大幅增加调度开销。此外，[net.Conn][net.Conn] 没有提供检查连接活性的 API，因此 RPC
框架很难设计出高效的连接池，池中的失效连接无法及时清理。

另一方面，开源社区目前缺少专注于 RPC 方案的 Go 网络库。类似的项目如：[evio][evio]
, [gnet][gnet] 等，均面向 [Redis][Redis], [HAProxy][HAProxy] 这样的场景。

因此 [Netpoll][Netpoll] 应运而生，它借鉴了 [evio][evio]
和 [netty][netty] 的优秀设计，具有出色的 [性能](#性能)，更适用于微服务架构。
同时，[Netpoll][Netpoll] 还提供了一些 [特性](#特性)，推荐在 RPC 设计中替代
[net][net] 。

基于 [Netpoll][Netpoll] 开发的 RPC 框架 [Kitex][Kitex] 和 HTTP 框架 [Hertz][Hertz]，性能均业界领先。

[范例][netpoll-examples] 展示了如何使用 [Netpoll][Netpoll]
构建 RPC Client 和 Server。

更多信息请参阅 [文档](#文档)。

## 特性

* **已经支持**
    - [LinkBuffer][LinkBuffer] 提供可以流式读写的 nocopy API
    - [gopool][gopool] 提供高性能的 goroutine 池
    - [mcache][mcache] 提供高效的内存复用
    - `IsActive` 支持检查连接是否存活
    - `Dialer` 支持构建 client
    - `EventLoop` 支持构建 server
    - 支持 TCP，Unix Domain Socket
    - 支持 Linux，macOS（操作系统）

* **不被支持**
    - Windows（操作系统）

## 性能

性能测试应满足工业级使用要求，在 RPC 场景下，并发请求、等待超时是必要的支持项。

我们提供了 [netpoll-benchmark][netpoll-benchmark] 项目用来长期追踪和比较 [Netpoll][Netpoll] 与其他框架在不同情况下的性能数据以供参考。

更多测试参考 [kitex-benchmark][kitex-benchmark] 和 [hertz-benchmark][hertz-benchmark]

## 参考

* [官方网站](https://www.cloudwego.io)
* [使用文档](docs/guide/guide_cn.md)
* [设计文档](docs/reference/design_cn.md)
* [DATA RACE 说明](docs/reference/explain.md)

[Netpoll]: https://github.com/cloudwego/netpoll
[net]: https://github.com/golang/go/tree/master/src/net
[net.Conn]: https://github.com/golang/go/blob/master/src/net/net.go
[evio]: https://github.com/tidwall/evio
[gnet]: https://github.com/panjf2000/gnet
[netty]: https://github.com/netty/netty
[Kitex]: https://github.com/cloudwego/kitex
[Hertz]: https://github.com/cloudwego/hertz

[netpoll-benchmark]: https://github.com/cloudwego/netpoll-benchmark
[kitex-benchmark]: https://github.com/cloudwego/kitex-benchmark
[hertz-benchmark]: https://github.com/cloudwego/hertz-benchmark
[netpoll-examples]:https://github.com/cloudwego/netpoll-examples

[ByteDance]: https://www.bytedance.com
[Redis]: https://redis.io
[HAProxy]: http://www.haproxy.org

[LinkBuffer]: nocopy_linkbuffer.go
[gopool]: https://github.com/bytedance/gopkg/tree/develop/util/gopool
[mcache]: https://github.com/bytedance/gopkg/tree/develop/lang/mcache


================================================
FILE: _typos.toml
================================================
# Typo check: https://github.com/crate-ci/typos

[files]
extend-exclude = ["go.mod", "go.sum"]

[default.extend-identifiers]
# *sigh* this just isn't worth the cost of fixing
nd = "nd"
write_datas = "write_datas"


================================================
FILE: connection.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package netpoll

import (
	"net"
	"time"
)

// CloseCallback will be called after the connection is closed.
// Return: error is unused which will be ignored directly.
type CloseCallback func(connection Connection) error

// Connection supports reading and writing simultaneously,
// but does not support simultaneous reading or writing by multiple goroutines.
// It maintains its own input/output buffer, and provides nocopy API for reading and writing.
type Connection interface {
	// Connection extends net.Conn, just for interface compatibility.
	// It's not recommended to use net.Conn API except for io.Closer.
	net.Conn

	// The recommended API for nocopy reading and writing.
	// Reader will return nocopy buffer data, or error after timeout which set by SetReadTimeout.
	Reader() Reader
	// Writer will write data to the connection by NIO mode,
	// so it will return an error only when the connection isn't Active.
	Writer() Writer

	// IsActive checks whether the connection is active or not.
	IsActive() bool

	// SetReadTimeout sets the timeout for future Read calls wait.
	// A zero value for timeout means Reader will not timeout.
	SetReadTimeout(timeout time.Duration) error

	// SetWriteTimeout sets the timeout for future Write calls wait.
	// A zero value for timeout means Writer will not timeout.
	SetWriteTimeout(timeout time.Duration) error

	// SetIdleTimeout sets the idle timeout of connections by enabling TCP KeepAlive
	// and setting the KeepAlive interval to the given timeout duration.
	// NOTE: Despite its name, this does not track application-level idle time.
	// It configures OS-level TCP KeepAlive to detect dead peers on idle connections.
	// The name is kept for backward compatibility.
	SetIdleTimeout(timeout time.Duration) error

	// SetOnRequest can set or replace the OnRequest method for a connection, but can't be set to nil.
	// Although SetOnRequest avoids data race, it should still be used before transmitting data.
	// Replacing OnRequest while processing data may cause unexpected behavior and results.
	// Generally, the server side should uniformly set the OnRequest method for each connection via NewEventLoop,
	// which is set when the connection is initialized.
	// On the client side, if necessary, make sure that OnRequest is set before sending data.
	SetOnRequest(on OnRequest) error

	// AddCloseCallback can add hangup callback for a connection, which will be called when connection closing.
	// This is very useful for cleaning up idle connections. For instance, you can use callbacks to clean up
	// the local resources, which bound to the idle connection, when hangup by the peer. No need another goroutine
	// to polling check connection status.
	AddCloseCallback(callback CloseCallback) error
}

// Conn extends net.Conn, but supports getting the conn's fd.
type Conn interface {
	net.Conn

	// Fd return conn's fd, used by poll
	Fd() (fd int)
}

// Listener extends net.Listener, but supports getting the listener's fd.
type Listener interface {
	net.Listener

	// Fd return listener's fd, used by poll.
	Fd() (fd int)
}

// Dialer extends net.Dialer's API, just for interface compatibility.
// DialConnection is recommended, but of course all functions are practically the same.
// The returned net.Conn can be directly asserted as Connection if error is nil.
type Dialer interface {
	DialConnection(network, address string, timeout time.Duration) (connection Connection, err error)

	DialTimeout(network, address string, timeout time.Duration) (conn net.Conn, err error)
}


================================================
FILE: connection_errors.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package netpoll

import (
	"fmt"
	"net"
	"syscall"
)

// extends syscall.Errno, the range is set to 0x100-0x1FF
const (
	// The connection closed when in use.
	ErrConnClosed = syscall.Errno(0x101)
	// Read I/O buffer timeout, called by Connection.Reader
	ErrReadTimeout = syscall.Errno(0x102)
	// Dial timeout
	ErrDialTimeout = syscall.Errno(0x103)
	// Calling dialer without timeout.
	ErrDialNoDeadline = syscall.Errno(0x104) // TODO: no-deadline support in future
	// The calling function not support.
	ErrUnsupported = syscall.Errno(0x105)
	// Same as io.EOF
	ErrEOF = syscall.Errno(0x106)
	// Write I/O buffer timeout, calling by Connection.Writer
	ErrWriteTimeout = syscall.Errno(0x107)
	// Concurrent connection access error
	ErrConcurrentAccess = syscall.Errno(0x108)
)

const ErrnoMask = 0xFF

// wrap Errno, implement xerrors.Wrapper
func Exception(err error, suffix string) error {
	no, ok := err.(syscall.Errno)
	if !ok {
		if suffix == "" {
			return err
		}
		return fmt.Errorf("%s %s", err.Error(), suffix)
	}
	return &exception{no: no, suffix: suffix}
}

var _ net.Error = (*exception)(nil)

type exception struct {
	no     syscall.Errno
	suffix string
}

func (e *exception) Error() string {
	var s string
	if int(e.no)&0x100 != 0 {
		s = errnos[int(e.no)&ErrnoMask]
	}
	if s == "" {
		s = e.no.Error()
	}
	if e.suffix != "" {
		s += " " + e.suffix
	}
	return s
}

func (e *exception) Is(target error) bool {
	if e == target {
		return true
	}
	if e.no == target {
		return true
	}
	// TODO: ErrConnClosed contains ErrEOF
	if e.no == ErrEOF && target == ErrConnClosed {
		return true
	}
	return e.no.Is(target)
}

func (e *exception) Unwrap() error {
	return e.no
}

func (e *exception) Timeout() bool {
	switch e.no {
	case ErrDialTimeout, ErrReadTimeout, ErrWriteTimeout:
		return true
	}
	return e.no.Timeout()
}

func (e *exception) Temporary() bool {
	return e.no.Temporary()
}

// Errors defined in netpoll
var errnos = [...]string{
	ErrnoMask & ErrConnClosed:       "connection has been closed",
	ErrnoMask & ErrReadTimeout:      "connection read timeout",
	ErrnoMask & ErrDialTimeout:      "dial wait timeout",
	ErrnoMask & ErrDialNoDeadline:   "dial no deadline",
	ErrnoMask & ErrUnsupported:      "netpoll does not support",
	ErrnoMask & ErrEOF:              "EOF",
	ErrnoMask & ErrWriteTimeout:     "connection write timeout",
	ErrnoMask & ErrConcurrentAccess: "concurrent connection access",
}


================================================
FILE: connection_errors_test.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows
// +build !windows

package netpoll

import (
	"errors"
	"syscall"
	"testing"
)

func TestErrno(t *testing.T) {
	var err1 error = Exception(ErrConnClosed, "when next")
	MustTrue(t, errors.Is(err1, ErrConnClosed))
	Equal(t, err1.Error(), "connection has been closed when next")
	t.Logf("error1=%s", err1)

	var err2 error = Exception(syscall.EPIPE, "when flush")
	MustTrue(t, errors.Is(err2, syscall.EPIPE))
	Equal(t, err2.Error(), "broken pipe when flush")
	t.Logf("error2=%s", err2)
}


================================================
FILE: connection_impl.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows
// +build !windows

package netpoll

import (
	"sync"
	"sync/atomic"
	"syscall"
	"time"
)

type connState = int32

const (
	connStateNone         = 0
	connStateConnected    = 1
	connStateDisconnected = 2
)

// connection is the implementation of Connection
type connection struct {
	netFD
	onEvent
	locker
	operator      *FDOperator
	readTimeout   time.Duration
	readDeadline  int64 // UnixNano(). it overwrites readTimeout. 0 if not set.
	readTimer     *time.Timer
	readTrigger   chan error
	waitReadSize  int64
	writeTimeout  time.Duration
	writeDeadline int64 // UnixNano(). it overwrites writeTimeout. 0 if not set.
	writeTimer    *time.Timer
	writeTrigger  chan error
	inputBuffer   *LinkBuffer
	outputBuffer  *LinkBuffer
	outputBarrier *barrier
	maxSize       int       // The maximum size of data between two Release().
	bookSize      int       // The size of data that can be read at once.
	state         connState // Connection state should be changed sequentially.
}

var (
	_ Connection = &connection{}
	_ Reader     = &connection{}
	_ Writer     = &connection{}
)

// Reader implements Connection.
func (c *connection) Reader() Reader {
	return c
}

// Writer implements Connection.
func (c *connection) Writer() Writer {
	return c
}

// IsActive implements Connection.
func (c *connection) IsActive() bool {
	return c.isCloseBy(none)
}

// SetIdleTimeout implements Connection.
func (c *connection) SetIdleTimeout(timeout time.Duration) error {
	if timeout > 0 {
		return c.SetKeepAlive(int(timeout.Seconds()))
	}
	return nil
}

// SetReadTimeout implements Connection.
func (c *connection) SetReadTimeout(timeout time.Duration) error {
	if timeout >= 0 {
		c.readTimeout = timeout
	}
	c.readDeadline = 0
	return nil
}

// SetWriteTimeout implements Connection.
func (c *connection) SetWriteTimeout(timeout time.Duration) error {
	if timeout >= 0 {
		c.writeTimeout = timeout
	}
	c.writeDeadline = 0
	return nil
}

// SetDeadline implements net.Conn.SetDeadline
func (c *connection) SetDeadline(t time.Time) error {
	v := int64(0)
	if !t.IsZero() {
		v = t.UnixNano()
	}
	c.readDeadline = v
	c.writeDeadline = v
	return nil
}

// SetReadDeadline implements net.Conn.SetReadDeadline
func (c *connection) SetReadDeadline(t time.Time) error {
	if t.IsZero() {
		c.readDeadline = 0
	} else {
		c.readDeadline = t.UnixNano()
	}
	return nil
}

// SetWriteDeadline implements net.Conn.SetWriteDeadline
func (c *connection) SetWriteDeadline(t time.Time) error {
	if t.IsZero() {
		c.writeDeadline = 0
	} else {
		c.writeDeadline = t.UnixNano()
	}
	return nil
}

// ------------------------------------------ implement zero-copy reader ------------------------------------------

// Next implements Connection.
func (c *connection) Next(n int) (p []byte, err error) {
	if err = c.waitRead(n); err != nil {
		return p, err
	}
	return c.inputBuffer.Next(n)
}

// Peek implements Connection.
func (c *connection) Peek(n int) (buf []byte, err error) {
	if err = c.waitRead(n); err != nil {
		return buf, err
	}
	return c.inputBuffer.Peek(n)
}

// Skip implements Connection.
func (c *connection) Skip(n int) (err error) {
	if err = c.waitRead(n); err != nil {
		return err
	}
	return c.inputBuffer.Skip(n)
}

// Release implements Connection.
func (c *connection) Release() (err error) {
	// Check inputBuffer length first to reduce contention in mux situation.
	// c.operator.do competes with c.inputs/c.inputAck
	if c.inputBuffer.Len() == 0 && c.operator.do() {
		maxSize := c.inputBuffer.calcMaxSize()
		// Set the maximum value of maxsize equal to mallocMax to prevent GC pressure.
		if maxSize > mallocMax {
			maxSize = mallocMax
		}

		if maxSize > c.maxSize {
			c.maxSize = maxSize
		}
		// Double check length to reset tail node
		if c.inputBuffer.Len() == 0 {
			c.inputBuffer.resetTail(c.maxSize)
		}
		c.operator.done()
	}
	return c.inputBuffer.Release()
}

// Slice implements Connection.
func (c *connection) Slice(n int) (r Reader, err error) {
	if err = c.waitRead(n); err != nil {
		return nil, err
	}
	return c.inputBuffer.Slice(n)
}

// Len implements Connection.
func (c *connection) Len() (length int) {
	return c.inputBuffer.Len()
}

// Until implements Connection.
func (c *connection) Until(delim byte) (line []byte, err error) {
	var n, l int
	for {
		if err = c.waitRead(n + 1); err != nil {
			// return all the data in the buffer
			line, _ = c.inputBuffer.Next(c.inputBuffer.Len())
			return
		}

		l = c.inputBuffer.Len()
		i := c.inputBuffer.indexByte(delim, n)
		if i < 0 {
			n = l // skip all exists bytes
			continue
		}
		return c.Next(i + 1)
	}
}

// ReadString implements Connection.
func (c *connection) ReadString(n int) (s string, err error) {
	if err = c.waitRead(n); err != nil {
		return s, err
	}
	return c.inputBuffer.ReadString(n)
}

// ReadBinary implements Connection.
func (c *connection) ReadBinary(n int) (p []byte, err error) {
	if err = c.waitRead(n); err != nil {
		return p, err
	}
	return c.inputBuffer.ReadBinary(n)
}

// ReadByte implements Connection.
func (c *connection) ReadByte() (b byte, err error) {
	if err = c.waitRead(1); err != nil {
		return b, err
	}
	return c.inputBuffer.ReadByte()
}

// ------------------------------------------ implement zero-copy writer ------------------------------------------

// Malloc implements Connection.
func (c *connection) Malloc(n int) (buf []byte, err error) {
	if !c.IsActive() {
		return nil, Exception(ErrConnClosed, "when malloc")
	}
	return c.outputBuffer.Malloc(n)
}

// MallocLen implements Connection.
func (c *connection) MallocLen() (length int) {
	return c.outputBuffer.MallocLen()
}

// Flush will send all malloc data to the peer,
// so must confirm that the allocated bytes have been correctly assigned.
//
// Flush first checks whether the out buffer is empty.
// If empty, it will call syscall.Write to send data directly,
// otherwise the buffer will be sent asynchronously by the epoll trigger.
func (c *connection) Flush() error {
	if !c.IsActive() {
		return Exception(ErrConnClosed, "when flush")
	}

	if !c.lock(flushing) {
		return Exception(ErrConcurrentAccess, "when flush")
	}
	defer c.unlock(flushing)

	c.outputBuffer.Flush()
	return c.flush()
}

// MallocAck implements Connection.
func (c *connection) MallocAck(n int) (err error) {
	if !c.IsActive() {
		return Exception(ErrConnClosed, "when malloc ack")
	}
	return c.outputBuffer.MallocAck(n)
}

// Append implements Connection.
func (c *connection) Append(w Writer) (err error) {
	if !c.IsActive() {
		return Exception(ErrConnClosed, "when append")
	}
	return c.outputBuffer.Append(w)
}

// WriteString implements Connection.
func (c *connection) WriteString(s string) (n int, err error) {
	if !c.IsActive() {
		return 0, Exception(ErrConnClosed, "when write string")
	}
	return c.outputBuffer.WriteString(s)
}

// WriteBinary implements Connection.
func (c *connection) WriteBinary(b []byte) (n int, err error) {
	if !c.IsActive() {
		return 0, Exception(ErrConnClosed, "when write binary")
	}
	return c.outputBuffer.WriteBinary(b)
}

// WriteDirect implements Connection.
func (c *connection) WriteDirect(p []byte, remainCap int) (err error) {
	if !c.IsActive() {
		return Exception(ErrConnClosed, "when write direct")
	}
	return c.outputBuffer.WriteDirect(p, remainCap)
}

// WriteByte implements Connection.
func (c *connection) WriteByte(b byte) (err error) {
	if !c.IsActive() {
		return Exception(ErrConnClosed, "when write byte")
	}
	return c.outputBuffer.WriteByte(b)
}

// ------------------------------------------ implement net.Conn ------------------------------------------

// Read behavior is the same as net.Conn, it will return io.EOF if buffer is empty.
func (c *connection) Read(p []byte) (n int, err error) {
	if len(p) == 0 {
		return 0, nil
	}
	if err = c.waitRead(1); err != nil {
		return 0, err
	}
	return c.inputBuffer.readCopy(p), nil
}

// Write will Flush soon.
func (c *connection) Write(p []byte) (n int, err error) {
	if !c.IsActive() {
		return 0, Exception(ErrConnClosed, "when write")
	}

	if !c.lock(flushing) {
		return 0, Exception(ErrConcurrentAccess, "when write")
	}
	defer c.unlock(flushing)

	dst, _ := c.outputBuffer.Malloc(len(p))
	n = copy(dst, p)
	c.outputBuffer.Flush()
	err = c.flush()
	return n, err
}

// Close implements Connection.
func (c *connection) Close() error {
	return c.onClose()
}

// Detach detaches the connection from poller but doesn't close it.
func (c *connection) Detach() error {
	c.detaching = true
	return c.onClose()
}

// ------------------------------------------ private ------------------------------------------

var barrierPool = sync.Pool{
	New: func() interface{} {
		return &barrier{
			bs:  make([][]byte, barriercap),
			ivs: make([]syscall.Iovec, barriercap),
		}
	},
}

// init initializes the connection with options
func (c *connection) init(conn Conn, opts *options) (err error) {
	// init buffer, barrier, finalizer
	c.readTrigger = make(chan error, 1)
	c.writeTrigger = make(chan error, 1)
	c.bookSize, c.maxSize = defaultLinkBufferSize, defaultLinkBufferSize
	c.inputBuffer, c.outputBuffer = NewLinkBuffer(defaultLinkBufferSize), NewLinkBuffer()
	c.outputBarrier = barrierPool.Get().(*barrier)
	c.state = connStateNone

	c.initNetFD(conn) // conn must be *netFD{}
	c.initFDOperator()
	c.initFinalizer()

	syscall.SetNonblock(c.fd, true)
	// enable TCP_NODELAY by default
	switch c.network {
	case "tcp", "tcp4", "tcp6":
		setTCPNoDelay(c.fd, true)
	}

	// connection initialized and prepare options
	return c.onPrepare(opts)
}

func (c *connection) initNetFD(conn Conn) {
	if nfd, ok := conn.(*netFD); ok {
		c.netFD = *nfd
		return
	}
	c.netFD = netFD{
		fd:         conn.Fd(),
		localAddr:  conn.LocalAddr(),
		remoteAddr: conn.RemoteAddr(),
	}
}

func (c *connection) initFDOperator() {
	poll := pollmanager.Pick()
	op := poll.Alloc()
	op.FD = c.fd
	op.OnRead, op.OnWrite, op.OnHup = nil, nil, c.onHup
	op.Inputs, op.InputAck = c.inputs, c.inputAck
	op.Outputs, op.OutputAck = c.outputs, c.outputAck
	c.operator = op
}

func (c *connection) initFinalizer() {
	c.AddCloseCallback(func(connection Connection) (err error) {
		c.stop(flushing)
		c.operator.Free()
		if err = c.netFD.Close(); err != nil {
			logger.Printf("NETPOLL: netFD close failed: %v", err)
		}
		c.closeBuffer()
		return nil
	})
}

func (c *connection) triggerRead(err error) {
	select {
	case c.readTrigger <- err:
	default:
	}
}

func (c *connection) triggerWrite(err error) {
	select {
	case c.writeTrigger <- err:
	default:
	}
}

// waitRead will wait full n bytes.
func (c *connection) waitRead(n int) (err error) {
	if n <= c.inputBuffer.Len() {
		return nil
	}
	atomic.StoreInt64(&c.waitReadSize, int64(n))
	defer atomic.StoreInt64(&c.waitReadSize, 0)
	if dl := c.readDeadline; dl > 0 {
		timeout := time.Duration(dl - time.Now().UnixNano())
		if timeout <= 0 {
			return Exception(ErrReadTimeout, c.remoteAddr.String())
		}
		return c.waitReadWithTimeout(n, timeout)
	} else if c.readTimeout > 0 {
		return c.waitReadWithTimeout(n, c.readTimeout)
	}
	// wait full n
	for c.inputBuffer.Len() < n {
		switch c.status(closing) {
		case poller:
			return Exception(ErrEOF, "wait read")
		case user:
			return Exception(ErrConnClosed, "wait read")
		default:
			err = <-c.readTrigger
			if err != nil {
				return err
			}
		}
	}
	return nil
}

// waitReadWithTimeout will wait full n bytes or until timeout.
func (c *connection) waitReadWithTimeout(n int, timeout time.Duration) (err error) {
	if c.readTimer == nil {
		c.readTimer = time.NewTimer(timeout)
	} else {
		c.readTimer.Reset(timeout)
	}

	for c.inputBuffer.Len() < n {
		switch c.status(closing) {
		case poller:
			// cannot return directly, stop timer first!
			err = Exception(ErrEOF, "wait read")
			goto RET
		case user:
			// cannot return directly, stop timer first!
			err = Exception(ErrConnClosed, "wait read")
			goto RET
		default:
			select {
			case <-c.readTimer.C:
				// double check if there is enough data to be read
				if c.inputBuffer.Len() >= n {
					return nil
				}
				return Exception(ErrReadTimeout, c.remoteAddr.String())
			case err = <-c.readTrigger:
				if err != nil {
					goto RET
				}
				continue
			}
		}
	}
RET:
	// clean timer.C
	if !c.readTimer.Stop() {
		<-c.readTimer.C
	}
	return err
}

// flush writes data directly.
func (c *connection) flush() error {
	if c.outputBuffer.IsEmpty() {
		return nil
	}
	bs := c.outputBuffer.GetBytes(c.outputBarrier.bs)
	n, err := sendmsg(c.fd, bs, c.outputBarrier.ivs, false)
	if err != nil && err != syscall.EAGAIN {
		return Exception(err, "when flush")
	}
	if n > 0 {
		err = c.outputBuffer.Skip(n)
		c.outputBuffer.Release()
		if err != nil {
			return Exception(err, "when flush")
		}
	}
	// return if write all buffer.
	if c.outputBuffer.IsEmpty() {
		return nil
	}
	err = c.operator.Control(PollR2RW)
	if err != nil {
		return Exception(err, "when flush")
	}

	return c.waitFlush()
}

func (c *connection) waitFlush() (err error) {
	timeout := c.writeTimeout
	if dl := c.writeDeadline; dl > 0 {
		timeout = time.Duration(dl - time.Now().UnixNano())
		if timeout <= 0 {
			return Exception(ErrWriteTimeout, c.remoteAddr.String())
		}
	}
	if timeout == 0 {
		return <-c.writeTrigger
	}

	// set write timeout
	if c.writeTimer == nil {
		c.writeTimer = time.NewTimer(timeout)
	} else {
		c.writeTimer.Reset(timeout)
	}

	select {
	case err = <-c.writeTrigger:
		if !c.writeTimer.Stop() { // clean timer
			<-c.writeTimer.C
		}
		return err
	case <-c.writeTimer.C:
		select {
		// try fetch writeTrigger if both cases fires
		case err = <-c.writeTrigger:
			return err
		default:
		}
		// if timeout, remove write event from poller
		// we cannot flush it again, since we don't if the poller is still process outputBuffer
		c.operator.Control(PollRW2R)
		return Exception(ErrWriteTimeout, c.remoteAddr.String())
	}
}

func (c *connection) getState() connState {
	return atomic.LoadInt32(&c.state)
}

func (c *connection) setState(newState connState) {
	atomic.StoreInt32(&c.state, newState)
}

func (c *connection) changeState(from, to connState) bool {
	return atomic.CompareAndSwapInt32(&c.state, from, to)
}


================================================
FILE: connection_lock.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package netpoll

import (
	"runtime"
	"sync/atomic"
)

type who = int32

const (
	none who = iota
	user
	poller
)

type key int32

/* State Diagram
+--------------+         +--------------+
|  processing  |-------->|   flushing   |
+-------+------+         +-------+------+
        |
        |                +--------------+
        +--------------->|   closing    |
                         +--------------+

- "processing" locks onRequest handler, and doesn't exist in dialer.
- "flushing" locks outputBuffer
- "closing" should wait for flushing finished and call the closeCallback after that.
*/

const (
	closing key = iota
	connecting
	processing
	flushing
	// total must be at the bottom.
	total
)

type locker struct {
	// keychain used for lock/unlock/stop operation by who.
	// 0 means unlock, 1 means locked, 2 means stop.
	keychain [total]int32
}

func (l *locker) closeBy(w who) (success bool) {
	return atomic.CompareAndSwapInt32(&l.keychain[closing], 0, w)
}

func (l *locker) isCloseBy(w who) (yes bool) {
	return atomic.LoadInt32(&l.keychain[closing]) == w
}

func (l *locker) status(k key) int32 {
	return atomic.LoadInt32(&l.keychain[k])
}

func (l *locker) force(k key, v int32) {
	atomic.StoreInt32(&l.keychain[k], v)
}

func (l *locker) lock(k key) (success bool) {
	return atomic.CompareAndSwapInt32(&l.keychain[k], 0, 1)
}

func (l *locker) unlock(k key) {
	atomic.StoreInt32(&l.keychain[k], 0)
}

func (l *locker) stop(k key) {
	for !atomic.CompareAndSwapInt32(&l.keychain[k], 0, 2) && atomic.LoadInt32(&l.keychain[k]) != 2 {
		runtime.Gosched()
	}
}

func (l *locker) isUnlock(k key) bool {
	return atomic.LoadInt32(&l.keychain[k]) == 0
}


================================================
FILE: connection_onevent.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows
// +build !windows

package netpoll

import (
	"context"
	"sync/atomic"

	"github.com/cloudwego/netpoll/internal/runner"
)

// ------------------------------------ implement OnPrepare, OnRequest, CloseCallback ------------------------------------

type gracefulExit interface {
	isIdle() (yes bool)
	Close() (err error)
}

// onEvent is the collection of event processing.
// OnPrepare, OnRequest, CloseCallback share the lock processing,
// which is a CAS lock and can only be cleared by OnRequest.
type onEvent struct {
	ctx                  context.Context
	onConnectCallback    atomic.Value
	onDisconnectCallback atomic.Value
	onRequestCallback    atomic.Value
	closeCallbacks       atomic.Value // value is latest *callbackNode
}

type callbackNode struct {
	fn  CloseCallback
	pre *callbackNode
}

// SetOnConnect set the OnConnect callback.
func (c *connection) SetOnConnect(onConnect OnConnect) error {
	if onConnect != nil {
		c.onConnectCallback.Store(onConnect)
	}
	return nil
}

// SetOnDisconnect set the OnDisconnect callback.
func (c *connection) SetOnDisconnect(onDisconnect OnDisconnect) error {
	if onDisconnect != nil {
		c.onDisconnectCallback.Store(onDisconnect)
	}
	return nil
}

// SetOnRequest initialize ctx when setting OnRequest.
func (c *connection) SetOnRequest(onRequest OnRequest) error {
	if onRequest == nil {
		return nil
	}
	c.onRequestCallback.Store(onRequest)
	// fix: trigger OnRequest if there is already input data.
	if !c.inputBuffer.IsEmpty() {
		c.onRequest()
	}
	return nil
}

// AddCloseCallback adds a CloseCallback to this connection.
func (c *connection) AddCloseCallback(callback CloseCallback) error {
	if callback == nil {
		return nil
	}
	cb := &callbackNode{}
	cb.fn = callback
	if pre := c.closeCallbacks.Load(); pre != nil {
		cb.pre = pre.(*callbackNode)
	}
	c.closeCallbacks.Store(cb)
	return nil
}

// onPrepare supports close connection, but not read/write data.
// connection will be registered by this call after preparing.
func (c *connection) onPrepare(opts *options) (err error) {
	if opts != nil {
		c.SetOnConnect(opts.onConnect)
		c.SetOnDisconnect(opts.onDisconnect)
		c.SetOnRequest(opts.onRequest)
		c.SetReadTimeout(opts.readTimeout)
		c.SetWriteTimeout(opts.writeTimeout)
		c.SetIdleTimeout(opts.idleTimeout)

		// calling prepare first and then register.
		if opts.onPrepare != nil {
			c.ctx = opts.onPrepare(c)
		}
	}

	if c.ctx == nil {
		c.ctx = context.Background()
	}
	// prepare may close the connection.
	if c.IsActive() {
		return c.register()
	}
	return nil
}

// onConnect is responsible for executing onRequest if there is new data coming after onConnect callback finished.
func (c *connection) onConnect() {
	onConnect, _ := c.onConnectCallback.Load().(OnConnect)
	if onConnect == nil {
		c.changeState(connStateNone, connStateConnected)
		return
	}
	if !c.lock(connecting) {
		// it never happens because onDisconnect will not lock connecting if c.connected == 0
		return
	}
	onRequest, _ := c.onRequestCallback.Load().(OnRequest)
	c.onProcess(onConnect, onRequest)
}

// when onDisconnect called, c.IsActive() must return false
func (c *connection) onDisconnect() {
	onDisconnect, _ := c.onDisconnectCallback.Load().(OnDisconnect)
	if onDisconnect == nil {
		return
	}
	onConnect, _ := c.onConnectCallback.Load().(OnConnect)
	if onConnect == nil {
		// no need lock if onConnect is nil
		// it's ok to force set state to disconnected since onConnect is nil
		c.setState(connStateDisconnected)
		onDisconnect(c.ctx, c)
		return
	}
	// check if OnConnect finished when onConnect != nil && onDisconnect != nil
	if c.getState() != connStateNone && c.lock(connecting) { // means OnConnect already finished
		// protect onDisconnect run once
		// if CAS return false, means OnConnect already helps to run onDisconnect
		if c.changeState(connStateConnected, connStateDisconnected) {
			onDisconnect(c.ctx, c)
		}
		c.unlock(connecting)
		return
	}
	// OnConnect is not finished yet, return and let onConnect helps to call onDisconnect
}

// onRequest is responsible for executing the closeCallbacks after the connection has been closed.
func (c *connection) onRequest() (needTrigger bool) {
	onRequest, ok := c.onRequestCallback.Load().(OnRequest)
	if !ok {
		return true
	}
	// wait onConnect finished first
	if c.getState() == connStateNone && c.onConnectCallback.Load() != nil {
		// let onConnect to call onRequest
		return
	}
	processed := c.onProcess(nil, onRequest)
	// if not processed, should trigger read
	return !processed
}

// onProcess is responsible for executing the onConnect/onRequest function serially,
// and make sure the connection has been closed correctly if user call c.Close() in onConnect/onRequest function.
func (c *connection) onProcess(onConnect OnConnect, onRequest OnRequest) (processed bool) {
	// task already exists
	if !c.lock(processing) {
		return false
	}

	task := func() {
		panicked := true
		defer func() {
			if !panicked {
				return
			}
			// cannot use recover() here, since we don't want to break the panic stack
			c.unlock(processing)
			if c.IsActive() {
				c.Close()
			} else {
				c.closeCallback(false, false)
			}
		}()
		// trigger onConnect first
		if onConnect != nil && c.changeState(connStateNone, connStateConnected) {
			c.ctx = onConnect(c.ctx, c)
			if !c.IsActive() && c.changeState(connStateConnected, connStateDisconnected) {
				// since we hold connecting lock, so we should help to call onDisconnect here
				onDisconnect, _ := c.onDisconnectCallback.Load().(OnDisconnect)
				if onDisconnect != nil {
					onDisconnect(c.ctx, c)
				}
			}
			c.unlock(connecting)
		}
	START:
		// The `onRequest` must be executed at least once if conn have any readable data,
		// which is in order to cover the `send & close by peer` case.
		if onRequest != nil && c.Reader().Len() > 0 {
			_ = onRequest(c.ctx, c)
		}
		// The processing loop must ensure that the connection meets `IsActive`.
		// `onRequest` must either eventually read all the input data or actively Close the connection,
		// otherwise the goroutine will fall into a dead loop.
		var closedBy who
		for {
			closedBy = c.status(closing)
			// close by user or not processable
			if closedBy == user || onRequest == nil || c.Reader().Len() == 0 {
				break
			}
			_ = onRequest(c.ctx, c)
		}
		// handling callback if connection has been closed.
		if closedBy != none {
			//  if closed by user when processing, it "may" needs detach
			needDetach := closedBy == user
			// Here is a corner case that operator will be detached twice:
			//   If server closed the connection(client OnHup will detach op first and closeBy=poller),
			//   and then client's OnRequest function also closed the connection(closeBy=user).
			// But operator already prevent that detach twice will not cause any problem
			c.closeCallback(false, needDetach)
			panicked = false
			return
		}
		c.unlock(processing)
		// Note: Poller's closeCallback call will try to get processing lock failed but here already near to unlock processing.
		//       So here we need to check connection state again, to avoid connection leak
		// double check close state
		if c.status(closing) != 0 && c.lock(processing) {
			// poller will get the processing lock failed, here help poller do closeCallback
			// fd must already detach by poller
			c.closeCallback(false, false)
			panicked = false
			return
		}
		// double check is processable
		if onRequest != nil && c.Reader().Len() > 0 && c.lock(processing) {
			goto START
		}
		// task exits
		panicked = false
	} // end of task closure func

	// add new task
	runner.RunTask(c.ctx, task)
	return true
}

// closeCallback .
// It can be confirmed that closeCallback and onRequest will not be executed concurrently.
// If onRequest is still running, it will trigger closeCallback on exit.
func (c *connection) closeCallback(needLock, needDetach bool) (err error) {
	if needLock && !c.lock(processing) {
		return nil
	}
	if needDetach && c.operator.poll != nil { // If Close is called during OnPrepare, poll is not registered.
		// PollDetach only happen when user call conn.Close() or poller detect error
		if err := c.operator.Control(PollDetach); err != nil {
			logger.Printf("NETPOLL: closeCallback[%v,%v] detach operator failed: %v", needLock, needDetach, err)
		}
	}
	latest := c.closeCallbacks.Load()
	if latest == nil {
		return nil
	}
	for callback := latest.(*callbackNode); callback != nil; callback = callback.pre {
		callback.fn(c)
	}
	return nil
}

// register only use for connection register into poll.
func (c *connection) register() (err error) {
	err = c.operator.Control(PollReadable)
	if err != nil {
		logger.Printf("NETPOLL: connection register failed: %v", err)
		c.Close()
		return Exception(ErrConnClosed, err.Error())
	}
	return nil
}

// isIdle implements gracefulExit.
func (c *connection) isIdle() (yes bool) {
	return c.isUnlock(processing) &&
		c.inputBuffer.IsEmpty() &&
		c.outputBuffer.IsEmpty()
}


================================================
FILE: connection_reactor.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows
// +build !windows

package netpoll

import (
	"sync/atomic"
)

// ------------------------------------------ implement FDOperator ------------------------------------------

// onHup means close by poller.
func (c *connection) onHup(p Poll) error {
	if !c.closeBy(poller) {
		return nil
	}
	c.triggerRead(Exception(ErrEOF, "peer close"))
	c.triggerWrite(Exception(ErrConnClosed, "peer close"))

	// call Disconnect callback first
	c.onDisconnect()

	// It depends on closing by user if OnConnect and OnRequest is nil, otherwise it needs to be released actively.
	// It can be confirmed that the OnRequest goroutine has been exited before closeCallback executing,
	// and it is safe to close the buffer at this time.
	onConnect := c.onConnectCallback.Load()
	onRequest := c.onRequestCallback.Load()
	needCloseByUser := onConnect == nil && onRequest == nil
	if !needCloseByUser {
		// already PollDetach when call OnHup
		c.closeCallback(true, false)
	}
	return nil
}

// onClose means close by user.
func (c *connection) onClose() error {
	// user code close the connection
	if c.closeBy(user) {
		c.triggerRead(Exception(ErrConnClosed, "self close"))
		c.triggerWrite(Exception(ErrConnClosed, "self close"))
		// Detach from poller when processing finished, otherwise it will cause race
		c.closeCallback(true, true)
		return nil
	}

	// closed by poller
	// still need to change closing status to `user` since OnProcess should not be processed again
	c.force(closing, user)

	// user code should actively close the connection to recycle resources.
	// poller already detached operator
	return c.closeCallback(true, false)
}

// closeBuffer recycle input & output LinkBuffer.
func (c *connection) closeBuffer() {
	onConnect, _ := c.onConnectCallback.Load().(OnConnect)
	onRequest, _ := c.onRequestCallback.Load().(OnRequest)
	// if client close the connection, we cannot ensure that the poller is not process the buffer,
	// so we need to check the buffer length, and if it's an "unclean" close operation, let's give up to reuse the buffer
	if c.inputBuffer.Len() == 0 || onConnect != nil || onRequest != nil {
		c.inputBuffer.Close()
	}
	if c.outputBuffer.Len() == 0 || onConnect != nil || onRequest != nil {
		c.outputBuffer.Close()
		barrierPool.Put(c.outputBarrier)
	}
}

// inputs implements FDOperator.
func (c *connection) inputs(vs [][]byte) (rs [][]byte) {
	vs[0] = c.inputBuffer.book(c.bookSize, c.maxSize)
	return vs[:1]
}

// inputAck implements FDOperator.
func (c *connection) inputAck(n int) (err error) {
	if n <= 0 {
		c.inputBuffer.bookAck(0)
		return nil
	}

	// Auto size bookSize.
	if n == c.bookSize && c.bookSize < mallocMax {
		c.bookSize <<= 1
	}

	length, _ := c.inputBuffer.bookAck(n)
	if c.maxSize < length {
		c.maxSize = length
	}
	if c.maxSize > mallocMax {
		c.maxSize = mallocMax
	}

	needTrigger := true
	if length == n { // first start onRequest
		needTrigger = c.onRequest()
	}
	if needTrigger && length >= int(atomic.LoadInt64(&c.waitReadSize)) {
		c.triggerRead(nil)
	}
	return nil
}

// outputs implements FDOperator.
func (c *connection) outputs(vs [][]byte) (rs [][]byte, _ bool) {
	if c.outputBuffer.IsEmpty() {
		c.rw2r()
		return rs, false
	}
	rs = c.outputBuffer.GetBytes(vs)
	return rs, false
}

// outputAck implements FDOperator.
func (c *connection) outputAck(n int) (err error) {
	if n > 0 {
		c.outputBuffer.Skip(n)
		c.outputBuffer.Release()
	}
	if c.outputBuffer.IsEmpty() {
		c.rw2r()
	}
	return nil
}

// rw2r removed the monitoring of write events.
func (c *connection) rw2r() {
	c.operator.Control(PollRW2R)
	c.triggerWrite(nil)
}


================================================
FILE: connection_test.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows
// +build !windows

package netpoll

import (
	"context"
	"errors"
	"fmt"
	"net"
	"os"
	"runtime"
	"strings"
	"sync"
	"sync/atomic"
	"syscall"
	"testing"
	"time"
)

func BenchmarkConnectionIO(b *testing.B) {
	dataSize := 1024 * 16
	writeBuffer := make([]byte, dataSize)
	rfd, wfd := GetSysFdPairs()
	rconn, wconn := new(connection), new(connection)
	rconn.init(&netFD{fd: rfd}, &options{onRequest: func(ctx context.Context, connection Connection) error {
		read, _ := connection.Reader().Next(dataSize)
		_ = wconn.Reader().Release()
		_, _ = connection.Writer().WriteBinary(read)
		_ = connection.Writer().Flush()
		return nil
	}})
	wconn.init(&netFD{fd: wfd}, new(options))

	b.ResetTimer()
	b.ReportAllocs()
	for i := 0; i < b.N; i++ {
		_, _ = wconn.WriteBinary(writeBuffer)
		_ = wconn.Flush()
		_, _ = wconn.Reader().Next(dataSize)
		_ = wconn.Reader().Release()
	}
}

func TestConnectionWrite(t *testing.T) {
	cycle, caps := 10000, 256
	msg, buf := make([]byte, caps), make([]byte, caps)
	var wg sync.WaitGroup
	wg.Add(1)
	var count int32
	expect := int32(cycle * caps)
	opts := &options{}
	opts.onRequest = func(ctx context.Context, connection Connection) error {
		n, err := connection.Read(buf)
		MustNil(t, err)
		if atomic.AddInt32(&count, int32(n)) >= expect {
			wg.Done()
		}
		return nil
	}

	r, w := GetSysFdPairs()
	rconn, wconn := &connection{}, &connection{}
	rconn.init(&netFD{fd: r}, opts)
	wconn.init(&netFD{fd: w}, opts)

	for i := 0; i < cycle; i++ {
		n, err := wconn.Write(msg)
		MustNil(t, err)
		Equal(t, n, len(msg))
	}
	wg.Wait()
	Equal(t, atomic.LoadInt32(&count), expect)
	rconn.Close()
}

func TestConnectionLargeWrite(t *testing.T) {
	// ci machine don't have 4GB memory, so skip test
	t.Skipf("skip large write test for ci job")
	totalSize := 1024 * 1024 * 1024 * 4
	var wg sync.WaitGroup
	wg.Add(1)
	opts := &options{}
	opts.onRequest = func(ctx context.Context, connection Connection) error {
		if connection.Reader().Len() < totalSize {
			return nil
		}
		_, err := connection.Reader().Next(totalSize)
		MustNil(t, err)
		err = connection.Reader().Release()
		MustNil(t, err)
		wg.Done()
		return nil
	}

	r, w := GetSysFdPairs()
	rconn, wconn := &connection{}, &connection{}
	rconn.init(&netFD{fd: r}, opts)
	wconn.init(&netFD{fd: w}, opts)

	msg := make([]byte, totalSize/4)
	for i := 0; i < 4; i++ {
		_, err := wconn.Writer().WriteBinary(msg)
		MustNil(t, err)
	}
	wg.Wait()

	rconn.Close()
}

func TestConnectionRead(t *testing.T) {
	r, w := GetSysFdPairs()
	rconn, wconn := &connection{}, &connection{}
	err := rconn.init(&netFD{fd: r}, nil)
	MustNil(t, err)
	err = wconn.init(&netFD{fd: w}, nil)
	MustNil(t, err)

	size := 256
	cycleTime := 1000
	msg := make([]byte, size)
	var wg sync.WaitGroup
	wg.Add(1)
	go func() {
		defer wg.Done()
		for i := 0; i < cycleTime; i++ {
			buf, err := rconn.Reader().Next(size)
			MustNil(t, err)
			Equal(t, len(buf), size)
			rconn.Reader().Release()
		}
	}()
	for i := 0; i < cycleTime; i++ {
		n, err := wconn.Write(msg)
		MustNil(t, err)
		Equal(t, n, len(msg))
	}
	wg.Wait()
	rconn.Close()
}

// TestConnectionIOReader tests the io.Reader Read method which uses readCopy internally.
// Verifies that Read after Peek preserves exposed buffer until Release.
func TestConnectionIOReader(t *testing.T) {
	r, w := GetSysFdPairs()
	rconn := &connection{}
	rconn.init(&netFD{fd: r}, nil)

	msg := make([]byte, 64)
	for i := range msg {
		msg[i] = byte(i)
	}
	var wg sync.WaitGroup
	wg.Add(1)
	go func() {
		defer wg.Done()
		// Peek exposes the underlying buffer
		pk, err := rconn.Peek(16)
		MustNil(t, err)
		Equal(t, len(pk), 16)

		// Read copies without exposing
		buf := make([]byte, 64)
		n, err := rconn.Read(buf)
		MustNil(t, err)
		Equal(t, n, 64)
		for i := 0; i < 64; i++ {
			Equal(t, buf[i], byte(i))
		}

		// Peek data still valid before Release
		for i := 0; i < 16; i++ {
			Equal(t, pk[i], byte(i))
		}
		rconn.Release()
	}()
	syscall.Write(w, msg)
	wg.Wait()
	rconn.Close()
	syscall.Close(w)
}

func TestConnectionReadAfterClosed(t *testing.T) {
	r, w := GetSysFdPairs()
	rconn := &connection{}
	rconn.init(&netFD{fd: r}, nil)
	size := 256
	msg := make([]byte, size)
	var wg sync.WaitGroup
	wg.Add(1)
	go func() {
		defer wg.Done()
		buf, err := rconn.Reader().Next(size)
		MustNil(t, err)
		Equal(t, len(buf), size)
	}()
	time.Sleep(time.Millisecond)
	syscall.Write(w, msg)
	syscall.Close(w)
	wg.Wait()
}

func TestConnectionWaitReadHalfPacket(t *testing.T) {
	r, w := GetSysFdPairs()
	rconn := &connection{}
	rconn.init(&netFD{fd: r}, nil)
	size := pagesize * 2
	msg := make([]byte, size)

	// write half packet
	syscall.Write(w, msg[:size/2])
	// wait poller reads buffer
	for rconn.inputBuffer.Len() <= 0 {
		runtime.Gosched()
	}

	// wait read full packet
	var wg sync.WaitGroup
	wg.Add(1)
	go func() {
		defer wg.Done()
		buf, err := rconn.Reader().Next(size)
		Equal(t, atomic.LoadInt64(&rconn.waitReadSize), int64(0))
		MustNil(t, err)
		Equal(t, len(buf), size)
	}()

	// write left half packet
	for atomic.LoadInt64(&rconn.waitReadSize) <= 0 {
		runtime.Gosched()
	}
	Equal(t, atomic.LoadInt64(&rconn.waitReadSize), int64(size))
	syscall.Write(w, msg[size/2:])
	wg.Wait()
}

func TestReadTimer(t *testing.T) {
	read := time.NewTimer(time.Second)
	MustTrue(t, read.Stop())
	time.Sleep(time.Millisecond)
	Equal(t, len(read.C), 0)
}

func TestReadTrigger(t *testing.T) {
	trigger := make(chan int, 1)
	select {
	case trigger <- 0:
	default:
	}
	Equal(t, len(trigger), 1)
}

func writeAll(fd int, buf []byte) error {
	for len(buf) > 0 {
		n, err := syscall.Write(fd, buf)
		if n < 0 {
			return err
		}
		buf = buf[n:]
	}
	return nil
}

func createTestTCPListener(t *testing.T) net.Listener {
	ln, err := net.Listen("tcp", "127.0.0.1:0")
	MustNil(t, err)
	return ln
}

// Large packet write test. The socket buffer is 2MB by default, here to verify
// whether Connection.Close can be executed normally after socket output buffer is full.
func TestLargeBufferWrite(t *testing.T) {
	ln := createTestTCPListener(t)
	defer ln.Close()
	address := ln.Addr().String()
	ln, err := ConvertListener(ln)
	MustNil(t, err)

	trigger := make(chan int)
	defer close(trigger)
	go func() {
		for {
			conn, err := ln.Accept()
			if conn == nil && err == nil {
				continue
			}
			trigger <- conn.(*netFD).fd
			<-trigger
			err = ln.Close()
			MustNil(t, err)
			return
		}
	}()

	conn, err := DialConnection("tcp", address, time.Second)
	MustNil(t, err)
	rfd := <-trigger

	var wg sync.WaitGroup
	wg.Add(1)
	bufferSize := 2 * 1024 * 1024 // 2MB
	round := 128
	// start large buffer writing
	go func() {
		defer wg.Done()
		for i := 1; i <= round+1; i++ {
			_, err := conn.Writer().Malloc(bufferSize)
			MustNil(t, err)
			err = conn.Writer().Flush()
			if i <= round {
				MustNil(t, err)
			}
		}
	}()

	// wait socket buffer full
	time.Sleep(time.Millisecond * 100)
	buf := make([]byte, 1024)
	for received := 0; received < round*bufferSize; {
		n, _ := syscall.Read(rfd, buf)
		received += n
	}
	// close success
	err = conn.Close()
	MustNil(t, err)
	wg.Wait()
	trigger <- 1
}

func TestConnectionTimeout(t *testing.T) {
	ln, err := net.Listen("tcp", "127.0.0.1:0")
	MustNil(t, err)
	defer ln.Close()

	const (
		bufsz    = 1 << 20
		interval = 10 * time.Millisecond
	)

	calcRate := func(n int32) int32 {
		v := n / int32(time.Second/interval)
		if v > bufsz {
			panic(v)
		}
		if v < 1 {
			return 1
		}
		return v
	}

	wn := int32(1) // for each Read, must <= bufsz
	setServerWriteRate := func(n int32) {
		atomic.StoreInt32(&wn, calcRate(n))
	}

	rn := int32(1) // for each Write, must <= bufsz
	setServerReadRate := func(n int32) {
		atomic.StoreInt32(&rn, calcRate(n))
	}

	go func() {
		for {
			conn, err := ln.Accept()
			if err != nil {
				return
			}
			// set small SO_SNDBUF/SO_RCVBUF buffer for better control timeout test
			tcpconn := conn.(*net.TCPConn)
			tcpconn.SetReadBuffer(512)
			tcpconn.SetWriteBuffer(512)
			go func() {
				buf := make([]byte, bufsz)
				for {
					n := atomic.LoadInt32(&rn)
					_, err := conn.Read(buf[:int(n)])
					if err != nil {
						conn.Close()
						return
					}
					time.Sleep(interval)
				}
			}()

			go func() {
				buf := make([]byte, bufsz)
				for {
					n := atomic.LoadInt32(&wn)
					_, err := conn.Write(buf[:int(n)])
					if err != nil {
						conn.Close()
						return
					}
					time.Sleep(interval)
				}
			}()
		}
	}()

	newConn := func() Connection {
		conn, err := DialConnection("tcp", ln.Addr().String(), time.Second)
		MustNil(t, err)
		fd := conn.(Conn).Fd()
		// set small SO_SNDBUF/SO_RCVBUF buffer for better control timeout test
		err = syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_SNDBUF, 512)
		MustNil(t, err)
		err = syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_RCVBUF, 512)
		MustNil(t, err)
		return conn
	}

	mallocAndFlush := func(conn Connection, sz int) error {
		_, err := conn.Writer().Malloc(sz)
		MustNil(t, err)
		return conn.Writer().Flush()
	}

	t.Run("TestWriteTimeout", func(t *testing.T) {
		setServerReadRate(10 << 10) // 10KB/s

		conn := newConn()
		defer conn.Close()

		// write 1KB without timeout
		err := mallocAndFlush(conn, 1<<10) // ~100ms
		MustNil(t, err)

		// write 50ms timeout
		_ = conn.SetWriteTimeout(50 * time.Millisecond)
		err = mallocAndFlush(conn, 1<<20)
		MustTrue(t, errors.Is(err, ErrWriteTimeout))
	})

	t.Run("TestReadTimeout", func(t *testing.T) {
		setServerWriteRate(10 << 10) // 10KB/s

		conn := newConn()
		defer conn.Close()

		// read 1KB without timeout
		_, err := conn.Reader().Next(1 << 10) // ~100ms
		MustNil(t, err)

		// read 20KB ~ 2s, 50ms timeout
		_ = conn.SetReadTimeout(50 * time.Millisecond)
		_, err = conn.Reader().Next(20 << 10)
		MustTrue(t, errors.Is(err, ErrReadTimeout))
	})

	t.Run("TestWriteDeadline", func(t *testing.T) {
		setServerReadRate(10 << 10) // 10KB/s

		conn := newConn()
		defer conn.Close()

		// write 1KB without deadline
		err := conn.SetWriteDeadline(time.Now())
		MustNil(t, err)
		err = conn.SetDeadline(time.Time{})
		MustNil(t, err)
		err = mallocAndFlush(conn, 1<<10) // ~100ms
		MustNil(t, err)

		// write with deadline
		err = conn.SetWriteDeadline(time.Now().Add(50 * time.Millisecond))
		MustNil(t, err)
		t0 := time.Now()
		err = mallocAndFlush(conn, 1<<20)
		MustTrue(t, errors.Is(err, ErrWriteTimeout))
		MustTrue(t, time.Since(t0)-50*time.Millisecond < 20*time.Millisecond)

		// write deadline exceeded
		t1 := time.Now()
		err = mallocAndFlush(conn, 10<<10)
		MustTrue(t, errors.Is(err, ErrWriteTimeout))
		MustTrue(t, time.Since(t1) < 20*time.Millisecond)
	})

	t.Run("TestReadDeadline", func(t *testing.T) {
		setServerWriteRate(20 << 10) // 20KB/s

		conn := newConn()
		defer conn.Close()

		// read 1KB without deadline
		err := conn.SetReadDeadline(time.Now())
		MustNil(t, err)
		err = conn.SetDeadline(time.Time{})
		MustNil(t, err)
		_, err = conn.Reader().Next(1 << 10)
		MustNil(t, err)

		// read 100KB with deadline
		err = conn.SetReadDeadline(time.Now().Add(50 * time.Millisecond))
		MustNil(t, err)
		t0 := time.Now()
		_, err = conn.Reader().Next(100 << 10)
		MustTrue(t, errors.Is(err, ErrReadTimeout))
		MustTrue(t, time.Since(t0)-50*time.Millisecond < 20*time.Millisecond)

		// read 10KB, deadline exceeded
		t1 := time.Now()
		_, err = conn.Reader().Next(10 << 10)
		MustTrue(t, errors.Is(err, ErrReadTimeout))
		MustTrue(t, time.Since(t1) < 20*time.Millisecond)
	})
}

// TestConnectionLargeMemory is used to verify the memory usage in the large package scenario.
func TestConnectionLargeMemory(t *testing.T) {
	var start, end runtime.MemStats
	runtime.GC()
	runtime.ReadMemStats(&start)

	r, w := GetSysFdPairs()
	rconn := &connection{}
	rconn.init(&netFD{fd: r}, nil)

	var wg sync.WaitGroup
	rn, wn := 1024, 1*1024*1024

	wg.Add(1)
	go func() {
		defer wg.Done()
		_, err := rconn.Reader().Next(wn)
		MustNil(t, err)
	}()

	msg := make([]byte, rn)
	for i := 0; i < wn/rn; i++ {
		n, err := syscall.Write(w, msg)
		if err != nil {
			MustNil(t, err)
		}
		Equal(t, n, rn)
	}

	runtime.ReadMemStats(&end)
	alloc := end.TotalAlloc - start.TotalAlloc
	limit := uint64(4 * 1024 * 1024)
	Assert(t, alloc <= limit, fmt.Sprintf("alloc[%d] out of memory %d", alloc, limit))
}

// TestSetTCPNoDelay is used to verify the connection initialization set the TCP_NODELAY correctly
func TestSetTCPNoDelay(t *testing.T) {
	fd, err := sysSocket(syscall.AF_INET, syscall.SOCK_STREAM, 0)
	MustNil(t, err)
	conn := &connection{}
	conn.init(&netFD{network: "tcp", fd: fd}, nil)

	n, _ := syscall.GetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_NODELAY)
	MustTrue(t, n > 0)
	err = setTCPNoDelay(fd, false)
	MustNil(t, err)
	n, _ = syscall.GetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_NODELAY)
	MustTrue(t, n == 0)
}

func TestConnectionUntil(t *testing.T) {
	r, w := GetSysFdPairs()
	rconn, wconn := &connection{}, &connection{}
	rconn.init(&netFD{fd: r}, nil)
	wconn.init(&netFD{fd: w}, nil)
	loopSize := 10000

	msg := make([]byte, 1002)
	msg[500], msg[1001] = '\n', '\n'
	go func() {
		for i := 0; i < loopSize; i++ {
			n, err := wconn.Write(msg)
			MustNil(t, err)
			MustTrue(t, n == len(msg))
		}
		wconn.Write(msg[:100])
		wconn.Close()
	}()

	for i := 0; i < loopSize*2; i++ {
		buf, err := rconn.Reader().Until('\n')
		MustNil(t, err)
		Equal(t, len(buf), 501)
		rconn.Reader().Release()
	}

	buf, err := rconn.Reader().Until('\n')
	Equal(t, len(buf), 100)
	Assert(t, errors.Is(err, ErrEOF), err)
}

func TestBookSizeLargerThanMaxSize(t *testing.T) {
	r, w := GetSysFdPairs()
	rconn, wconn := &connection{}, &connection{}
	err := rconn.init(&netFD{fd: r}, nil)
	MustNil(t, err)
	err = wconn.init(&netFD{fd: w}, nil)
	MustNil(t, err)

	// prepare data
	maxSize := 1024 * 1024 * 128
	origin := make([][]byte, 0)
	for size := maxSize; size > 0; size = size >> 1 {
		ch := 'a' + byte(size%26)
		origin = append(origin, make([]byte, size))
		for i := 0; i < size; i++ {
			origin[len(origin)-1][i] = ch
		}
	}

	// read
	var wg sync.WaitGroup
	wg.Add(1)
	go func() {
		defer wg.Done()
		idx := 0
		for size := maxSize; size > 0; size = size >> 1 {
			buf, err := rconn.Reader().Next(size)
			MustNil(t, err)
			Equal(t, string(buf), string(origin[idx]))
			err = rconn.Reader().Release()
			MustNil(t, err)
			idx++
		}
	}()

	// write
	for i := 0; i < len(origin); i++ {
		n, err := wconn.Write(origin[i])
		MustNil(t, err)
		Equal(t, n, len(origin[i]))
	}
	wg.Wait()
	rconn.Close()
	wconn.Close()
}

func TestConnDetach(t *testing.T) {
	ln := createTestTCPListener(t)
	defer ln.Close()
	address := ln.Addr().String()

	// accept => read => write
	var wg sync.WaitGroup
	go func() {
		for {
			conn, err := ln.Accept()
			if err != nil {
				return
			}
			if conn == nil {
				continue
			}
			wg.Add(1)
			go func() {
				defer wg.Done()
				buf := make([]byte, 1024)
				// slow read
				_, err := conn.Read(buf)
				if err != nil {
					return
				}
				time.Sleep(10 * time.Millisecond)
				_, err = conn.Write(buf)
				if err != nil {
					return
				}
			}()
		}
	}()

	// dial => detach => write => read
	c, err := DialConnection("tcp", address, time.Second)
	MustNil(t, err)
	conn := c.(*TCPConnection)
	err = conn.Detach()
	MustNil(t, err)

	f := os.NewFile(uintptr(conn.fd), "netpoll-connection")
	defer f.Close()
	gonetconn, err := net.FileConn(f)
	MustNil(t, err)
	buf := make([]byte, 1024)
	_, err = gonetconn.Write(buf)
	MustNil(t, err)
	_, err = gonetconn.Read(buf)
	MustNil(t, err)

	err = gonetconn.Close()
	MustNil(t, err)
	err = ln.Close()
	MustNil(t, err)
	err = c.Close()
	MustNil(t, err)
	wg.Wait()
}

func TestParallelShortConnection(t *testing.T) {
	ln := createTestTCPListener(t)
	defer ln.Close()
	address := ln.Addr().String()

	var received int64
	el, err := NewEventLoop(func(ctx context.Context, connection Connection) error {
		data, err := connection.Reader().Next(connection.Reader().Len())
		atomic.AddInt64(&received, int64(len(data)))
		if err != nil {
			return err
		}
		// t.Logf("conn[%s] received: %d, active: %v", connection.RemoteAddr(), len(data), connection.IsActive())
		return nil
	})
	MustNil(t, err)
	go func() {
		el.Serve(ln)
	}()
	defer el.Shutdown(context.Background())

	conns := 100
	sizePerConn := 1024
	totalSize := conns * sizePerConn
	var wg sync.WaitGroup
	for i := 0; i < conns; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			conn, err := DialConnection("tcp", address, time.Second)
			MustNil(t, err)
			n, err := conn.Writer().WriteBinary(make([]byte, sizePerConn))
			MustNil(t, err)
			MustTrue(t, n == sizePerConn)
			err = conn.Writer().Flush()
			MustNil(t, err)
			err = conn.Close()
			MustNil(t, err)
		}()
	}
	wg.Wait()

	t0 := time.Now()
	for atomic.LoadInt64(&received) < int64(totalSize) {
		time.Sleep(time.Millisecond)
		if time.Since(t0) > 100*time.Millisecond { // max wait 100ms
			break
		}
	}
	Equal(t, atomic.LoadInt64(&received), int64(totalSize))
}

func TestConnectionServerClose(t *testing.T) {
	ln := createTestTCPListener(t)
	defer ln.Close()
	address := ln.Addr().String()

	/*
		Client              Server
		- Client --- connect   --> Server
		- Client <-- [ping]   --- Server
		- Client --- [pong]   --> Server
		- Client <-- close     --- Server
		- Client --- close     --> Server
	*/
	const PING, PONG = "ping", "pong"
	var wg sync.WaitGroup
	el, err := NewEventLoop(
		func(ctx context.Context, connection Connection) error {
			t.Logf("server.OnRequest: addr=%s", connection.RemoteAddr())
			defer wg.Done()
			buf, err := connection.Reader().Next(len(PONG)) // pong
			Equal(t, string(buf), PONG)
			MustNil(t, err)
			err = connection.Reader().Release()
			MustNil(t, err)
			err = connection.Close()
			MustNil(t, err)
			return err
		},
		WithOnConnect(func(ctx context.Context, connection Connection) context.Context {
			t.Logf("server.OnConnect: addr=%s", connection.RemoteAddr())
			defer wg.Done()
			// check OnPrepare
			v := ctx.Value("prepare").(string)
			Equal(t, v, "true")

			_, err := connection.Writer().WriteBinary([]byte(PING))
			MustNil(t, err)
			err = connection.Writer().Flush()
			MustNil(t, err)
			connection.AddCloseCallback(func(connection Connection) error {
				t.Logf("server.CloseCallback: addr=%s", connection.RemoteAddr())
				wg.Done()
				return nil
			})
			return ctx
		}),
		WithOnPrepare(func(connection Connection) context.Context {
			t.Logf("server.OnPrepare: addr=%s", connection.RemoteAddr())
			defer wg.Done()
			//nolint:staticcheck // SA1029 no built-in type string as key
			return context.WithValue(context.Background(), "prepare", "true")
		}),
	)
	MustNil(t, err)

	defer el.Shutdown(context.Background())
	go func() {
		err := el.Serve(ln)
		if err != nil {
			t.Logf("service end with error: %v", err)
		}
	}()

	var clientOnRequest OnRequest = func(ctx context.Context, connection Connection) error {
		t.Logf("client.OnRequest: addr=%s", connection.LocalAddr())
		defer wg.Done()
		buf, err := connection.Reader().Next(len(PING))
		MustNil(t, err)
		Equal(t, string(buf), PING)

		_, err = connection.Writer().WriteBinary([]byte(PONG))
		MustNil(t, err)
		err = connection.Writer().Flush()
		MustNil(t, err)

		_, err = connection.Reader().Next(1) // server will not send any data, just wait for server close
		MustTrue(t, errors.Is(err, ErrEOF))  // should get EOF when server close

		return connection.Close()
	}
	conns := 10
	// server: OnPrepare, OnConnect, OnRequest, CloseCallback
	// client: OnRequest, CloseCallback
	wg.Add(conns * 6)
	for i := 0; i < conns; i++ {
		go func() {
			conn, err := DialConnection("tcp", address, time.Second)
			MustNil(t, err)
			err = conn.SetOnRequest(clientOnRequest)
			MustNil(t, err)
			conn.AddCloseCallback(func(connection Connection) error {
				t.Logf("client.CloseCallback: addr=%s", connection.LocalAddr())
				defer wg.Done()
				return nil
			})
		}()
	}
	wg.Wait()
}

func TestWriterAfterClose(t *testing.T) {
	r, w := GetSysFdPairs()
	rconn, wconn := &connection{}, &connection{}
	rconn.init(&netFD{fd: r}, nil)
	wconn.init(&netFD{fd: w}, nil)

	err := wconn.Close()
	MustNil(t, err)

	for wconn.IsActive() {
		runtime.Gosched()
	}

	methods := []struct {
		name string
		fn   func() error
	}{
		{"Malloc", func() error { _, err := wconn.Malloc(1); return err }},
		{"MallocAck", func() error { return wconn.MallocAck(0) }},
		{"WriteBinary", func() error { _, err := wconn.WriteBinary([]byte("hi")); return err }},
		{"WriteString", func() error { _, err := wconn.WriteString("hi"); return err }},
		{"WriteByte", func() error { return wconn.WriteByte('a') }},
		{"WriteDirect", func() error { return wconn.WriteDirect([]byte("hi"), 0) }},
		{"Flush", func() error { return wconn.Flush() }},
	}
	for _, tc := range methods {
		t.Run(tc.name, func(t *testing.T) {
			defer func() {
				if r := recover(); r != nil {
					t.Fatalf("Writer.%s panicked after Close: %v", tc.name, r)
				}
			}()
			err := tc.fn()
			Assert(t, err != nil, fmt.Sprintf("Writer.%s should return error after Close", tc.name))
		})
	}
	rconn.Close()
}

func TestConnectionDailTimeoutAndClose(t *testing.T) {
	ln := createTestTCPListener(t)
	defer ln.Close()

	go func() {
		for {
			conn, err := ln.Accept()
			if err != nil {
				return
			}
			time.Sleep(time.Millisecond)
			conn.Close()
		}
	}()

	var wg sync.WaitGroup
	for i := 0; i < 100; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			conn, err := DialConnection("tcp", ln.Addr().String(), time.Millisecond)
			Assert(t, err == nil || strings.Contains(err.Error(), "i/o timeout"), err)
			if err == nil { // XXX: conn is always not nil ...
				conn.Close()
			}
		}()
	}
	wg.Wait()
}


================================================
FILE: docs/guide/guide_cn.md
================================================
# 快速开始

本教程通过一些简单的 [示例][Examples] 帮助您开始使用 [Netpoll][Netpoll]，包括如何使用 [Server](#1-使用-sever)、[Client](#2-使用-dialer) 和 [nocopy API](#3-使用-nocopy-api)。

## 1. 使用 Sever

[这里][server-example] 是一个简单的 server 例子，接下来我们会解释它是如何构建的。

### 1.1 创建 Listener

首先我们需要一个 `Listener`，它可以是 `net.Listener` 或者 `netpoll.Listener`，两者都可以，依据你的代码情况自由选择。
创建 `Listener` 的过程如下：

```go
package main

import "net"

func main() {
	listener, err := net.Listen(network, address)
	if err != nil {
		panic("create net listener failed")
	}
	...
}
```

或者

```go
package main

import "github.com/cloudwego/netpoll"

func main() {
	listener, err := netpoll.CreateListener(network, address)
	if err != nil {
		panic("create netpoll listener failed")
	}
	...
}
```

### 1.2 创建 EventLoop

`EventLoop` 是一个事件驱动的调度器，一个真正的 NIO Server，负责连接管理、事件调度等。

参数说明:

* `OnRequest` 是用户应该自己实现来处理业务逻辑的接口。 [注释][netpoll.go] 详细描述了它的行为。
* `Option` 用于自定义 `EventLoop` 创建时的配置，下面的例子展示了它的用法。更多详情请参考 [options][netpoll_options.go]。

创建过程如下：

```go
package main

import (
	"time"
	"github.com/cloudwego/netpoll"
)

var eventLoop netpoll.EventLoop

func main() {
	...
	eventLoop, _ = netpoll.NewEventLoop(
		handle,
		netpoll.WithOnPrepare(prepare),
		netpoll.WithReadTimeout(time.Second),
	)
	...
}
```

### 1.3 运行 Server

`EventLoop` 通过绑定 `Listener` 来提供服务，如下所示。`Serve` 方法为阻塞式调用，直到发生 `panic` 等错误，或者由用户主动调用 `Shutdown` 时触发退出。

```go
package main

import (
	"github.com/cloudwego/netpoll"
)

var eventLoop netpoll.EventLoop

func main() {
	...
	// start listen loop ...
	eventLoop.Serve(listener)
}
```

### 1.4 关闭 Server

`EventLoop` 提供了 `Shutdown` 功能，用于优雅地停止服务器。用法如下：

```go
package main

import (
	"context"
	"time"
	"github.com/cloudwego/netpoll"
)

var eventLoop netpoll.EventLoop

func main() {
	// stop server ...
	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
	defer cancel()
	eventLoop.Shutdown(ctx)
}
```

## 2. 使用 Dialer

[Netpoll][Netpoll] 也支持在 Client 端使用，提供了 `Dialer`，类似于 `net.Dialer`。同样的，[这里][client-example] 展示了一个简单的 Client 端示例，接下来我们详细介绍一下：

### 2.1 快速方式

与 [Net][net] 类似，[Netpoll][Netpoll] 提供了几个用于直接建立连接的公共方法，可以直接调用。 如：

```go
DialConnection(network, address string, timeout time.Duration) (connection Connection, err error)

DialTCP(ctx context.Context, network string, laddr, raddr *TCPAddr) (*TCPConnection, error)

DialUnix(network string, laddr, raddr *UnixAddr) (*UnixConnection, error)
```

### 2.2 创建 Dialer

[Netpoll][Netpoll] 还定义了`Dialer` 接口。 用法如下：（通常推荐使用上一节的快速方式）

```go
package main

import (
	"github.com/cloudwego/netpoll"
)

func main() {
	// Dial a connection with Dialer.
	dialer := netpoll.NewDialer()
	conn, err := dialer.DialConnection(network, address, timeout)
	if err != nil {
		panic("dial netpoll connection failed")
	}
	...
}
```

## 3. 使用 Nocopy API

`Connection` 提供了 Nocopy API —— `Reader` 和 `Writer`，以避免频繁复制。下面介绍一下它们的简单用法。

```go
package main

type Connection interface {
	// Recommended nocopy APIs
	Reader() Reader
	Writer() Writer
	... // see code comments for more details
}
```

### 3.1 简单用法

Nocopy API 设计为两步操作。

使用 `Reader` 时，通过 `Next`、`Peek`、`ReadString` 等方法读取数据后，还需要主动调用 `Release` 方法释放 buffer（`Nocopy` 读取 buffer 的原地址，所以您必须主动再次确认 buffer 已经不再使用）。

同样，使用 `Writer` 时，首先需要分配一个 `[]byte` 来写入数据，然后调用 `Flush` 确认所有数据都已经写入。`Writer` 还提供了丰富的 API 来分配 buffer，例如 `Malloc`、`WriteString` 等。

下面是一些简单的读写数据的例子。 更多详情请参考 [说明][nocopy.go]。

```go
package main

import (
	"github.com/cloudwego/netpoll"
)

func main() {
	var conn netpoll.Connection
	var reader, writer = conn.Reader(), conn.Writer()
	
	// reading
	buf, _ := reader.Next(n)
	... parse the read data ...
	reader.Release()
	
	// writing
	var write_data []byte
	... make the write data ...
	alloc, _ := writer.Malloc(len(write_data))
	copy(alloc, write_data) // write data
	writer.Flush()
}
```

### 3.2 高阶用法

如果你想使用单个连接来发送（或接收）多组数据（如连接多路复用），那么你将面临数据打包和分包。在 [net][net] 上，这种工作一般都是通过复制来完成的。一个例子如下：

```go
package main

import (
	"net"
)

func main() {
	var conn net.Conn
	var buf = make([]byte, 8192)
	
	// reading
	for {
		n, _ := conn.Read(buf)
		... unpacking & handling ...
		var i int
		for i = 0; i <= n-pkgsize; i += pkgsize {
			pkg := append([]byte{}, buf[i:i+pkgsize]...)
			go func() {
				... handling pkg ...
			}
		}
		buf = append(buf[:0], buf[i:n]...)
	}
	
	// writing
	var write_datas <-chan []byte
	... packing write ...
	for {
		pkg := <-write_datas
		conn.Write(pkg)
	}
}
```

但是，[Netpoll][Netpoll] 不需要这样做，nocopy APIs 支持对 buffer 进行原地址操作（原地址组包和分包），并通过引用计数实现资源的自动回收和重用。

示例如下（使用方法 `Reader.Slice` 和 `Writer.Append`）：

```go
package main

import (
	"github.com/cloudwego/netpoll"
)

func main() {
	var conn netpoll.Connection
	
	// reading
	reader := conn.Reader()
	for {
		... unpacking & handling ...
		pkg, _ := reader.Slice(pkgsize)
		go func() {
			... handling pkg ...
			pkg.Release()
		}
	}
	
	// writing
	var write_datas <-chan netpoll.Writer
	... packing write ...
	writer := conn.Writer()
	for {
		select {
		case pkg := <-write_datas:
			writer.Append(pkg)
		default:
			if writer.MallocLen() > 0 {
				writer.Flush()
			}
		}
	}
}
```

# 常见用法

## 1. 如何配置 poller 的数量 ？

`NumLoops` 表示 [Netpoll][Netpoll] 创建的 `epoll` 的数量，默认已经根据P的数量自动调整(`runtime.GOMAXPROCS(0)`)，用户一般不需要关心。

但是如果你的服务有大量的 I/O，你可能需要如下配置：

```go
package main

import (
	"runtime"
	"github.com/cloudwego/netpoll"
)

func init() {
	netpoll.SetNumLoops(runtime.GOMAXPROCS(0))
}
```

## 2. 如何配置 poller 的连接负载均衡 ？

当 [Netpoll][Netpoll] 中有多个 poller 时，服务进程中的连接会负载均衡到每个 poller。

现在支持以下策略：

1. Random
   * 新连接将分配给随机选择的轮询器。
2. RoundRobin
   * 新连接将按顺序分配给轮询器。
     
[Netpoll][Netpoll] 默认使用 `RoundRobin`，用户可以通过以下方式更改：
     
```go
package main

import (
	"github.com/cloudwego/netpoll"
)

func init() {
	netpoll.SetLoadBalance(netpoll.Random)
	
	// or
	netpoll.SetLoadBalance(netpoll.RoundRobin)
}
```

## 3. 如何配置 [gopool][gopool] ？

[Netpoll][Netpoll] 默认使用 [gopool][gopool] 作为 goroutine 池来优化 `栈扩张` 问题（RPC 服务常见问题）。

[gopool][gopool] 项目中已经详细解释了如何自定义配置，这里不再赘述。

当然，如果你的项目没有 `栈扩张` 问题，建议最好关闭 [gopool][gopool]，关闭方式如下：

```go
package main

import (
	"github.com/cloudwego/netpoll"
)

func init() {
	netpoll.DisableGopool()
}
```

## 4. 如何初始化新的连接 ？

Client 和 Server 端通过不同的方式初始化新连接。

1. 在 Server 端，定义了 `OnPrepare` 来初始化新链接，同时支持返回一个 `context`，可以传递给后续的业务处理并复用。`WithOnPrepare` 提供方法注册。当 Server 接收新连接时，会自动执行注册的 `OnPrepare` 方法来完成准备工作。示例如下：

```go
package main

import (
	"context"
	"github.com/cloudwego/netpoll"
)

func main() {
	// register OnPrepare
	var onPrepare netpoll.OnPrepare = prepare
	evl, _ := netpoll.NewEventLoop(handler, netpoll.WithOnPrepare(onPrepare))
	...
}

func prepare(connection netpoll.Connection) (ctx context.Context) {
	... prepare connection ...
	return
}
```

2. 在 Client 端，连接初始化需要由用户自行完成。 一般来说，`Dialer` 创建的新连接是可以由用户自行控制的，这与 Server 端被动接收连接不同。因此，用户不需要依赖触发器，可以自行初始化，如下所示：

```go
package main

import (
	"context"
	"github.com/cloudwego/netpoll"
)

func main() {
	conn, err := netpoll.DialConnection(network, address, timeout)
	if err != nil {
		panic("dial netpoll connection failed")
	}
	... prepare here directly ...
	prepare(conn)
	...
}

func prepare(connection netpoll.Connection) (ctx context.Context) {
	... prepare connection ...
	return
}
```

## 5. 如何配置连接超时 ？

[Netpoll][Netpoll] 现在支持两种类型的超时配置：

1. 读超时（`ReadTimeout`）
   * 为了保持与 `net.Conn` 相同的操作风格，`Connection.Reader` 也被设计为阻塞读取。 所以提供了读取超时（`ReadTimeout`）。
   * 读超时（`ReadTimeout`）没有默认值（默认无限等待），可以通过 `Connection` 或 `EventLoop.Option` 进行配置，例如：

```go
package main

import (
	"github.com/cloudwego/netpoll"
)

func main() {
	var conn netpoll.Connection
	
	// 1. setting by Connection
	conn.SetReadTimeout(timeout)
	
	// or
	
	// 2. setting with Option
	netpoll.NewEventLoop(handler, netpoll.WithReadTimeout(timeout))
	...
}
```

2. 空闲超时（`IdleTimeout`）
   * 空闲超时（`IdleTimeout`）利用 `TCP KeepAlive` 机制来踢出死连接并减少维护开销。使用 [Netpoll][Netpoll] 时，一般不需要频繁创建和关闭连接，所以通常来说，空闲连接影响不大。当连接长时间处于非活动状态时，为了防止出现假死、对端挂起、异常断开等造成的死连接，在空闲超时（`IdleTimeout`）后，netpoll 会主动关闭连接。
   * 空闲超时（`IdleTimeout`）的默认配置为 `10min`，可以通过 `Connection` API 或 `EventLoop.Option` 进行配置，例如：

```go
package main

import (
	"github.com/cloudwego/netpoll"
)

func main() {
	var conn netpoll.Connection
	
	// 1. setting by Connection
	conn.SetIdleTimeout(timeout)
	
	// or
	
	// 2. setting with Option
	netpoll.NewEventLoop(handler, netpoll.WithIdleTimeout(timeout))
	...
}
```

## 6. 如何配置连接的读事件回调 ？

`OnRequest` 是指连接上发生读事件时 [Netpoll][Netpoll] 触发的回调。在 Server 端，在创建 `EventLoop` 时，可以注册一个`OnRequest`，在每次连接数据到达时触发，进行业务处理。Client端默认没有 `OnRequest`，需要时可以通过 API 设置。例如：

```go
package main

import (
	"context"
	"github.com/cloudwego/netpoll"
)

func main() {
	var onRequest netpoll.OnRequest = handler
	
	// 1. on server side
	evl, _ := netpoll.NewEventLoop(onRequest, opts...)
	...
	
	// 2. on client side
	conn, _ := netpoll.DialConnection(network, address, timeout)
	conn.SetOnRequest(handler)
	...
}

func handler(ctx context.Context, connection netpoll.Connection) (err error) {
	... handling ...
	return nil
}
```

## 7. 如何配置连接的关闭回调 ？

`CloseCallback` 是指连接关闭时 [Netpoll][Netpoll] 触发的回调，用于在连接关闭后进行额外的处理。
[Netpoll][Netpoll] 能够感知连接状态。当连接被对端关闭或被自己清理时，会主动触发 `CloseCallback`，而不是由下一次调用 `Read` 或 `Write` 时返回错误（`net.Conn` 的方式）。
`Connection` 提供了添加 `CloseCallback` 的 API，已经添加的回调无法删除，支持多个回调。

```go
package main

import (
	"github.com/cloudwego/netpoll"
)

func main() {
	var conn netpoll.Connection
	
	// add close callback
	var cb netpoll.CloseCallback = callback
	conn.AddCloseCallback(cb)
	...
}

func callback(connection netpoll.Connection) error {
	return nil
}
```

# 注意事项

## 1. 错误设置 NumLoops

如果你的服务器运行在物理机上，Go 进程创建的 P 个数就等于机器的 CPU 核心数。 但是 Server 可能不会使用这么多核心。在这种情况下，过多的 poller 会导致性能下降。

这里提供了以下几种解决方案：

1. 使用 `taskset` 命令来限制 CPU 个数，例如：

```shell
taskset -c 0-3 $run_your_server
```

2. 主动设置 P 的个数，例如：

```go
package main

import (
	"runtime"
)

func init() {
	runtime.GOMAXPROCS(num_you_want)
}
```

3. 主动设置 poller 的个数，例如：

```go
package main

import (
	"github.com/cloudwego/netpoll"
)

func init() {
	netpoll.SetNumLoops(num_you_want)
}
```

[Netpoll]: https://github.com/cloudwego/netpoll

[net]: https://github.com/golang/go/tree/master/src/net

[gopool]: https://github.com/bytedance/gopkg/tree/develop/util/gopool

[Examples]: https://github.com/cloudwego/netpoll-examples

[server-example]: https://github.com/cloudwego/netpoll-examples/blob/main/server.go

[client-example]: https://github.com/cloudwego/netpoll-examples/blob/main/client.go

[netpoll.go]: https://github.com/cloudwego/netpoll/blob/main/netpoll.go

[netpoll_options.go]: https://github.com/cloudwego/netpoll/blob/main/netpoll_options.go

[nocopy.go]: https://github.com/cloudwego/netpoll/blob/main/nocopy.go


================================================
FILE: docs/guide/guide_en.md
================================================
# Tutorial

This tutorial gets you started with [Netpoll][Netpoll] through some simple [examples][Examples], includes how to
use [Server](#1-use-sever), [Client](#2-use-dialer) and [nocopy APIs](#3-use-nocopy-api).

## 1. Use Server

[Here][server-example] is a simple server demo, we will explain how it is constructed next.

### 1.1 Create Listener

First we need to get a `Listener`, it can be `net.Listener` or `netpoll.Listener`, which is no difference for server
usage. Create a `Listener` as shown below:

```go
package main

import "net"

func main() {
	listener, err := net.Listen(network, address)
	if err != nil {
		panic("create net listener failed")
	}
	...
}
```

or

```go
package main

import "github.com/cloudwego/netpoll"

func main() {
	listener, err := netpoll.CreateListener(network, address)
	if err != nil {
		panic("create netpoll listener failed")
	}
	...
}
```

### 1.2 New EventLoop

`EventLoop` is an event-driven scheduler, a real NIO Server, responsible for connection management, event scheduling,
etc.

params:

* `OnRequest` is an interface that users should implement by themselves to process business
  logic. [Code Comment][netpoll.go] describes its behavior in detail.
* `Option` is used to customize the configuration when creating `EventLoop`, and the following example shows its usage.
  For more details, please refer to [options][netpoll_options.go].

The creation process is as follows:

```go
package main

import (
	"time"
	"github.com/cloudwego/netpoll"
)

var eventLoop netpoll.EventLoop

func main() {
	...
	eventLoop, _ := netpoll.NewEventLoop(
		handle,
		netpoll.WithOnPrepare(prepare),
		netpoll.WithReadTimeout(time.Second),
	)
	...
}
```

### 1.3 Run Server

`EventLoop` provides services by binding `Listener`, as shown below.
`Serve` function will block until an error occurs, such as a panic or the user actively calls `Shutdown`.

```go
package main

import (
	"github.com/cloudwego/netpoll"
)

var eventLoop netpoll.EventLoop

func main() {
	...
	// start listen loop ...
	eventLoop.Serve(listener)
}
```

### 1.4 Shutdown Server

`EventLoop` provides the `Shutdown` function, which is used to stop the server gracefully. The usage is as follows.

```go
package main

import (
	"context"
	"time"
	"github.com/cloudwego/netpoll"
)

var eventLoop netpoll.EventLoop

func main() {
	// stop server ...
	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
	defer cancel()
	eventLoop.Shutdown(ctx)
}
```

## 2. Use Dialer

[Netpoll][Netpoll] also has the ability to be used on the Client side. It provides `Dialer`, similar to `net.Dialer`.
Again, [here][client-example] is a simple client demo, and then we introduce it in detail.

### 2.1 The Fast Way

Similar to [Net][net], [Netpoll][Netpoll] provides several public functions for directly dialing a connection. such as:

```go
DialConnection(network, address string, timeout time.Duration) (connection Connection, err error)

DialTCP(ctx context.Context, network string, laddr, raddr *TCPAddr) (*TCPConnection, error)

DialUnix(network string, laddr, raddr *UnixAddr) (*UnixConnection, error)
```

### 2.2 Create Dialer

[Netpoll][Netpoll] also defines the `Dialer` interface. The usage is as follows:
(of course, you can usually use the fast way)

```go
package main

import (
	"github.com/cloudwego/netpoll"
)

func main() {
	// Dial a connection with Dialer.
	dialer := netpoll.NewDialer()
	conn, err := dialer.DialConnection(network, address, timeout)
	if err != nil {
		panic("dial netpoll connection failed")
	}
	...
}
```

## 3. Use Nocopy API

`Connection` provides Nocopy APIs - `Reader` and `Writer`, to avoid frequent copying. Let’s introduce their simple
usage.

```go
package main

type Connection interface {
	// Recommended nocopy APIs
	Reader() Reader
	Writer() Writer
	... // see code comments for more details
}
```

### 3.1 Simple Usage

Nocopy APIs is designed as a two-step operation.

On `Reader`, after reading data through `Next`, `Peek`, `ReadString`, etc., you still have to actively call `Release` to
release the buffer(`Nocopy` reads the original address of the buffer, so you must take the initiative to confirm that
the buffer is no longer used).

Similarly, on `Writer`, you first need to allocate a buffer to write data, and then call `Flush` to confirm that all
data has been written.
`Writer` also provides rich APIs to allocate buffers, such as `Malloc`, `WriteString` and so on.

The following shows some simple examples of reading and writing data. For more details, please refer to
the [code comments][nocopy.go].

```go
package main

import (
	"github.com/cloudwego/netpoll"
)

func main() {
	var conn netpoll.Connection
	var reader, writer = conn.Reader(), conn.Writer()
	
	// reading
	buf, _ := reader.Next(n)
	... parse the read data ...
	reader.Release()
	
	// writing
	var write_data []byte
	... make the write data ...
	alloc, _ := writer.Malloc(len(write_data))
	copy(alloc, write_data) // write data
	writer.Flush()
}
```

### 3.2 Advanced Usage

If you want to use the connection to send (or receive) multiple sets of data, then you will face the work of packing and
unpacking the data.

On [net][net], this kind of work is generally done by copying. An example is as follows:

```go
package main

import (
	"net"
)

func main() {
	var conn net.Conn
	var buf = make([]byte, 8192)
	
	// reading
	for {
		n, _ := conn.Read(buf)
		... unpacking & handling ...
		var i int
		for i = 0; i <= n-pkgsize; i += pkgsize {
			pkg := append([]byte{}, buf[i:i+pkgsize]...)
			go func() {
				... handling pkg ...
			}
		}
		buf = append(buf[:0], buf[i:n]...)
	}
	
	// writing
	var write_datas <-chan []byte
	... packing write ...
	for {
		pkg := <-write_datas
		conn.Write(pkg)
	}
}
```

But, this is not necessary in [Netpoll][Netpoll], nocopy APIs supports operations on the original address of the buffer,
and realizes automatic recycling and reuse of resources through reference counting.

Examples are as follows(use function `Reader.Slice` and `Writer.Append`):

```go
package main

import (
	"github.com/cloudwego/netpoll"
)

func main() {
	var conn netpoll.Connection
	
	// reading
	reader := conn.Reader()
	for {
		... unpacking & handling ...
		pkg, _ := reader.Slice(pkgsize)
		go func() {
			... handling pkg ...
			pkg.Release()
		}
	}
	
	// writing
	var write_datas <-chan netpoll.Writer
	... packing write ...
	writer := conn.Writer()
	for {
		select {
		case pkg := <-write_datas:
			writer.Append(pkg)
		default:
			if writer.MallocLen() > 0 {
				writer.Flush()
			}
		}
	}
}
```

# How To

## 1. How to configure the number of pollers ?

`NumLoops` represents the number of `epoll` created by [Netpoll][Netpoll], which has been automatically adjusted
according to the number of P (`runtime.GOMAXPROCS(0)`) by default, and users generally don't need to care.

But if your service has heavy I/O, you may need the following configuration:

```go
package main

import (
	"runtime"
	"github.com/cloudwego/netpoll"
)

func init() {
	netpoll.SetNumLoops(runtime.GOMAXPROCS(0))
}
```

## 2. How to configure poller's connection loadbalance ?

When there are multiple pollers in [Netpoll][Netpoll], the connections in the service process will be loadbalanced to
each poller.

The following strategies are supported now:

1. Random
    * The new connection will be assigned to a randomly picked poller.
2. RoundRobin
    * The new connection will be assigned to the poller in order.

[Netpoll][Netpoll] uses `RoundRobin` by default, and users can change it in the following ways:

```go
package main

import (
	"github.com/cloudwego/netpoll"
)

func init() {
	netpoll.SetLoadBalance(netpoll.Random)
	
	// or
	netpoll.SetLoadBalance(netpoll.RoundRobin)
}
```

## 3. How to configure [gopool][gopool] ?

[Netpoll][Netpoll] uses [gopool][gopool] as the goroutine pool by default to optimize the `stack growth` problem that
generally occurs in RPC services.

In the project [gopool][gopool], it explains how to change its configuration, so won't repeat it here.

Of course, if your project does not have a `stack growth` problem, it is best to close [gopool][gopool] as follows:

```go
package main

import (
	"github.com/cloudwego/netpoll"
)

func init() {
	netpoll.DisableGopool()
}
```

## 4. How to prepare a new connection ?

There are different ways to prepare a new connection on the client and server.

1. On the server side, `OnPrepare` is defined to prepare for the new connection, and it also supports returning
   a `context`, which can be reused in subsequent business processing.
   `WithOnPrepare` provides this registration. When the server accepts a new connection, it will automatically execute
   the registered `OnPrepare` function to complete the preparation work. The example is as follows:

```go
package main

import (
	"context"
	"github.com/cloudwego/netpoll"
)

func main() {
	// register OnPrepare
	var onPrepare netpoll.OnPrepare = prepare
	evl, _ := netpoll.NewEventLoop(handler, netpoll.WithOnPrepare(onPrepare))
	...
}

func prepare(connection netpoll.Connection) (ctx context.Context) {
	... prepare connection ...
	return
}
```

2. On the client side, the connection preparation needs to be completed by the user. Generally speaking, the connection
   created by `Dialer` can be controlled by the user, which is different from passively accepting the connection on the
   server side. Therefore, the user not relying on the trigger, just prepare a new connection like this:

```go
package main

import (
	"context"
	"github.com/cloudwego/netpoll"
)

func main() {
	conn, err := netpoll.DialConnection(network, address, timeout)
	if err != nil {
		panic("dial netpoll connection failed")
	}
	... prepare here directly ...
	prepare(conn)
	...
}

func prepare(connection netpoll.Connection) (ctx context.Context) {
	... prepare connection ...
	return
}
```

## 5. How to configure connection timeout ?

[Netpoll][Netpoll] now supports two timeout configurations:

1. `Read Timeout`
    * In order to maintain the same operating style as `net.Conn`, `Connection.Reader` is also designed to block
      reading. So provide `Read Timeout`.
    * `Read Timeout` has no default value(wait infinitely), it can be configured via `Connection` or `EventLoop.Option`,
      for example:

```go
package main

import (
	"github.com/cloudwego/netpoll"
)

func main() {
	var conn netpoll.Connection
	
	// 1. setting by Connection
	conn.SetReadTimeout(timeout)
	
	// or
	
	// 2. setting with Option
	netpoll.NewEventLoop(handler, netpoll.WithReadTimeout(timeout))
	...
}
```

2. `Idle Timeout`
    * `Idle Timeout` utilizes the `TCP KeepAlive` mechanism to kick out dead connections and reduce maintenance
      overhead. When using [Netpoll][Netpoll], there is generally no need to create and close connections frequently,
      and idle connections have little effect. When the connection is inactive for a long time, in order to prevent dead
      connection caused by suspended animation, hang of the opposite end, abnormal disconnection, etc., the connection
      will be actively closed after the `Idle Timeout`.
    * The default minimum value of `Idle Timeout` is `10min`, which can be configured through `Connection` API
      or `EventLoop.Option`, for example:

```go
package main

import (
	"github.com/cloudwego/netpoll"
)

func main() {
	var conn netpoll.Connection
	
	// 1. setting by Connection
	conn.SetIdleTimeout(timeout)
	
	// or
	
	// 2. setting with Option
	netpoll.NewEventLoop(handler, netpoll.WithIdleTimeout(timeout))
	...
}
```

## 6. How to configure connection read event callback ?

`OnRequest` refers to the callback triggered by [Netpoll][Netpoll] when a read event occurs on the connection. On the
Server side, when creating the `EventLoop`, you can register an `OnRequest`, which will be triggered when each
connection data arrives and perform business processing. On the Client side, there is no `OnRequest` by default, and it
can be set via API when needed. E.g:

```go
package main

import (
	"context"
	"github.com/cloudwego/netpoll"
)

func main() {
	var onRequest netpoll.OnRequest = handler
	
	// 1. on server side
	evl, _ := netpoll.NewEventLoop(onRequest, opts...)
	...
	
	// 2. on client side
	conn, _ := netpoll.DialConnection(network, address, timeout)
	conn.SetOnRequest(handler)
	...
}

func handler(ctx context.Context, connection netpoll.Connection) (err error) {
	... handling ...
	return nil
}
```

## 7. How to configure the connection close callback ?

`CloseCallback` refers to the callback triggered by [Netpoll][Netpoll] when the connection is closed, which is used to
perform additional processing after the connection is closed.
[Netpoll][Netpoll] is able to perceive the connection status. When the connection is closed by peer or cleaned up by
self, it will actively trigger `CloseCallback` instead of returning an error on the next `Read` or `Write`(the way
of `net.Conn`).
`Connection` provides API for adding `CloseCallback`, callbacks that have been added cannot be removed, and multiple
callbacks are supported.

```go
package main

import (
	"github.com/cloudwego/netpoll"
)

func main() {
	var conn netpoll.Connection
	
	// add close callback
	var cb netpoll.CloseCallback = callback
	conn.AddCloseCallback(cb)
	...
}

func callback(connection netpoll.Connection) error {
	return nil
}
```

# Attention

## 1. Wrong setting of NumLoops

If your server is running on a physical machine, the number of P created by the Go process is equal to the number of
CPUs of the machine. But the server may not use so many cores. In this case, too many pollers will cause performance
degradation.

There are several solutions:

1. Use the `taskset` command to limit CPU usage, such as:

```shell
taskset -c 0-3 $run_your_server
```

2. Actively set the number of P, for instance:

```go
package main

import (
	"runtime"
)

func init() {
	runtime.GOMAXPROCS(num_you_want)
}
```

3. Actively set the number of pollers, e.g:

```go
package main

import (
	"github.com/cloudwego/netpoll"
)

func init() {
	netpoll.SetNumLoops(num_you_want)
}
```

[Netpoll]: https://github.com/cloudwego/netpoll

[net]: https://github.com/golang/go/tree/master/src/net

[gopool]: https://github.com/bytedance/gopkg/tree/develop/util/gopool

[Examples]: https://github.com/cloudwego/netpoll-examples

[server-example]: https://github.com/cloudwego/netpoll-examples/blob/main/server.go

[client-example]: https://github.com/cloudwego/netpoll-examples/blob/main/client.go

[netpoll.go]: https://github.com/cloudwego/netpoll/blob/main/netpoll.go

[netpoll_options.go]: https://github.com/cloudwego/netpoll/blob/main/netpoll_options.go

[nocopy.go]: https://github.com/cloudwego/netpoll/blob/main/nocopy.go


================================================
FILE: docs/reference/design_cn.md
================================================
# TODO

================================================
FILE: docs/reference/design_en.md
================================================
# TODO

================================================
FILE: docs/reference/explain.md
================================================
# DATA RACE EXPLAIN
`Netpoll` declare different files by `//+build !race` and `//+build race` to avoid `DATA RACE` detection in some code.

The reason is that the `epoll` uses `unsafe.Pointer` to access the struct pointer, in order
 to improve performance. This operation is beyond the detection range of the `race detector`,
 so it is mistaken for data race, but not code bug actually.


================================================
FILE: eventloop.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package netpoll

import (
	"context"
	"net"
)

// A EventLoop is a network server.
type EventLoop interface {
	// Serve registers a listener and runs blockingly to provide services, including listening to ports,
	// accepting connections and processing trans data. When an exception occurs or Shutdown is invoked,
	// Serve will return an error which describes the specific reason.
	Serve(ln net.Listener) error

	// Shutdown is used to graceful exit.
	// It will close all idle connections on the server, but will not change the underlying pollers.
	//
	// Argument: ctx set the waiting deadline, after which an error will be returned,
	// but will not force the closing of connections in progress.
	Shutdown(ctx context.Context) error
}

/* The Connection Callback Sequence Diagram
| Connection State                     | Callback Function | Notes
|   Connected but not initialized      |    OnPrepare      | Conn is not registered into poller
|   Connected and initialized          |    OnConnect      | Conn is ready for read or write
|   Read first byte                    |    OnRequest      | Conn is ready for read or write
|   Peer closed but conn is active     |    OnDisconnect   | Conn access will race with OnRequest function
|   Self closed and conn is closed     |    CloseCallback  | Conn is destroyed

Execution Order:
  OnPrepare => OnConnect => OnRequest      => CloseCallback
                            OnDisconnect
Note: only OnRequest and OnDisconnect will be executed in parallel
*/

// OnPrepare is used to inject custom preparation at connection initialization,
// which is optional but important in some scenarios. For example, a qps limiter
// can be set by closing overloaded connections directly in OnPrepare.
//
// Return:
// context will become the argument of OnRequest.
// Usually, custom resources can be initialized in OnPrepare and used in OnRequest.
//
// PLEASE NOTE:
// OnPrepare is executed without any data in the connection,
// so Reader() or Writer() cannot be used here, but may be supported in the future.
type OnPrepare func(connection Connection) context.Context

// OnConnect is called once connection created.
// It supports read/write/close connection, and could return a ctx which will be passed to OnRequest.
// OnConnect will not block the poller since it's executed asynchronously.
// Only after OnConnect finished the OnRequest could be executed.
//
// An example usage in TCP Proxy scenario:
//
//	func onConnect(ctx context.Context, upstream netpoll.Connection) context.Context {
//		downstream, _ := netpoll.DialConnection("tcp", downstreamAddr, time.Second)
//		return context.WithValue(ctx, downstreamKey, downstream)
//	}
//
//	func onRequest(ctx context.Context, upstream netpoll.Connection) error {
//		downstream := ctx.Value(downstreamKey).(netpoll.Connection)
//	}
type OnConnect func(ctx context.Context, connection Connection) context.Context

// OnDisconnect is called once connection is going to be closed.
// OnDisconnect must return as quick as possible because it will block poller.
// OnDisconnect is different from CloseCallback, you could check with "The Connection Callback Sequence Diagram" section.
type OnDisconnect func(ctx context.Context, connection Connection)

// OnRequest defines the function for handling connection. When data is sent from the connection peer,
// netpoll actively reads the data in LT mode and places it in the connection's input buffer.
// Generally, OnRequest starts handling the data in the following way:
//
//	func OnRequest(ctx context, connection Connection) error {
//		input := connection.Reader().Next(n)
//		handling input data...
//		send, _ := connection.Writer().Malloc(l)
//		copy(send, output)
//		connection.Flush()
//		return nil
//	}
//
// OnRequest will run in a separate goroutine and
// it is guaranteed that there is one and only one OnRequest running at the same time.
// The underlying logic is similar to:
//
//	go func() {
//		for !connection.Reader().IsEmpty() {
//			OnRequest(ctx, connection)
//		}
//	}()
//
// PLEASE NOTE:
// OnRequest must either eventually read all the input data or actively Close the connection,
// otherwise the goroutine will fall into a dead loop.
//
// Return: error is unused which will be ignored directly.
type OnRequest func(ctx context.Context, connection Connection) error


================================================
FILE: fd_operator.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package netpoll

import (
	"runtime"
	"sync/atomic"
)

// FDOperator is a collection of operations on file descriptors.
type FDOperator struct {
	// FD is file descriptor, poll will bind when register.
	FD int

	// The FDOperator provides three operations of reading, writing, and hanging.
	// The poll actively fire the FDOperator when fd changes, no check the return value of FDOperator.
	OnRead  func(p Poll) error
	OnWrite func(p Poll) error
	OnHup   func(p Poll) error

	// The following is the required fn, which must exist when used, or directly panic.
	// Fns are only called by the poll when handles connection events.
	Inputs   func(vs [][]byte) (rs [][]byte)
	InputAck func(n int) (err error)

	// Outputs will locked if len(rs) > 0, which need unlocked by OutputAck.
	// supportZeroCopy is not implemented, and it will be ignored
	Outputs   func(vs [][]byte) (rs [][]byte, supportZeroCopy bool)
	OutputAck func(n int) (err error)

	// poll is the registered location of the file descriptor.
	poll Poll

	// protect only detach once
	detached int32

	// private, used by operatorCache
	next  *FDOperator
	state int32 // CAS: 0(unused) 1(inuse) 2(do-done)
	index int32 // index in operatorCache
}

func (op *FDOperator) Control(event PollEvent) error {
	if event == PollDetach && atomic.AddInt32(&op.detached, 1) > 1 {
		return nil
	}
	return op.poll.Control(op, event)
}

func (op *FDOperator) Free() {
	op.poll.Free(op)
}

func (op *FDOperator) do() (can bool) {
	return atomic.CompareAndSwapInt32(&op.state, 1, 2)
}

func (op *FDOperator) done() {
	atomic.StoreInt32(&op.state, 1)
}

func (op *FDOperator) inuse() {
	for !atomic.CompareAndSwapInt32(&op.state, 0, 1) {
		if atomic.LoadInt32(&op.state) == 1 {
			return
		}
		runtime.Gosched()
	}
}

func (op *FDOperator) unused() {
	for !atomic.CompareAndSwapInt32(&op.state, 1, 0) {
		if atomic.LoadInt32(&op.state) == 0 {
			return
		}
		runtime.Gosched()
	}
}

func (op *FDOperator) isUnused() bool {
	return atomic.LoadInt32(&op.state) == 0
}

func (op *FDOperator) reset() {
	op.FD = 0
	op.OnRead, op.OnWrite, op.OnHup = nil, nil, nil
	op.Inputs, op.InputAck = nil, nil
	op.Outputs, op.OutputAck = nil, nil
	op.poll = nil
	op.detached = 0
}


================================================
FILE: fd_operator_cache.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package netpoll

import (
	"runtime"
	"sync/atomic"
	"unsafe"
)

func newOperatorCache() *operatorCache {
	return &operatorCache{
		cache:    make([]*FDOperator, 0, 1024),
		freelist: make([]int32, 0, 1024),
	}
}

type operatorCache struct {
	first  *FDOperator
	cache  []*FDOperator
	locked int32
	// freelist store the freeable operator
	// to reduce GC pressure, we only store op index here
	freelocked int32
	freelist   []int32
}

func (c *operatorCache) alloc() *FDOperator {
	lock(&c.locked)
	if c.first == nil {
		const opSize = unsafe.Sizeof(FDOperator{})
		n := block4k / opSize
		if n == 0 {
			n = 1
		}
		index := int32(len(c.cache))
		for i := uintptr(0); i < n; i++ {
			pd := &FDOperator{index: index}
			c.cache = append(c.cache, pd)
			pd.next = c.first
			c.first = pd
			index++
		}
	}
	op := c.first
	c.first = op.next
	unlock(&c.locked)
	return op
}

// freeable mark the operator that could be freed
// only poller could do the real free action
func (c *operatorCache) freeable(op *FDOperator) {
	// reset all state
	op.unused()
	op.reset()
	lock(&c.freelocked)
	c.freelist = append(c.freelist, op.index)
	unlock(&c.freelocked)
}

func (c *operatorCache) free() {
	lock(&c.freelocked)
	defer unlock(&c.freelocked)
	if len(c.freelist) == 0 {
		return
	}

	lock(&c.locked)
	for _, idx := range c.freelist {
		op := c.cache[idx]
		op.next = c.first
		c.first = op
	}
	c.freelist = c.freelist[:0]
	unlock(&c.locked)
}

func lock(locked *int32) {
	for !atomic.CompareAndSwapInt32(locked, 0, 1) {
		runtime.Gosched()
	}
}

func unlock(locked *int32) {
	atomic.StoreInt32(locked, 0)
}


================================================
FILE: fd_operator_cache_test.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows
// +build !windows

package netpoll

import (
	"runtime"
	"testing"
)

// go test -v -gcflags=-d=checkptr -run=TestPersistFDOperator
func TestPersistFDOperator(t *testing.T) {
	opcache := newOperatorCache()
	// init
	size := 2048
	ops := make([]*FDOperator, size)
	for i := 0; i < size; i++ {
		op := opcache.alloc()
		op.FD = i
		ops[i] = op
	}
	Equal(t, len(opcache.freelist), 0)
	// gc
	for i := 0; i < 4; i++ {
		runtime.GC()
	}
	// check alloc
	for i := range ops {
		Equal(t, ops[i].FD, i)
		opcache.freeable(ops[i])
		Equal(t, len(opcache.freelist), i+1)
	}
	Equal(t, len(opcache.freelist), size)
	opcache.free()
	Equal(t, len(opcache.freelist), 0)
	Assert(t, len(opcache.cache) >= size)
}

func BenchmarkPersistFDOperator1(b *testing.B) {
	b.ReportAllocs()
	b.ResetTimer()
	opcache := newOperatorCache()
	for i := 0; i < b.N; i++ {
		op := opcache.alloc()
		opcache.freeable(op)
		opcache.free()
	}
}

func BenchmarkPersistFDOperator2(b *testing.B) {
	// benchmark
	b.ReportAllocs()
	b.SetParallelism(128)
	b.ResetTimer()
	opcache := newOperatorCache()
	b.RunParallel(func(pb *testing.PB) {
		for pb.Next() {
			op := opcache.alloc()
			opcache.freeable(op)
			opcache.free()
		}
	})
}


================================================
FILE: go.mod
================================================
module github.com/cloudwego/netpoll

go 1.15

require (
	github.com/bytedance/gopkg v0.1.1
	github.com/cloudwego/gopkg v0.1.4
	golang.org/x/sys v0.19.0
)


================================================
FILE: go.sum
================================================
github.com/bytedance/gopkg v0.1.1 h1:3azzgSkiaw79u24a+w9arfH8OfnQQ4MHUt9lJFREEaE=
github.com/bytedance/gopkg v0.1.1/go.mod h1:576VvJ+eJgyCzdjS+c4+77QF3p7ubbtiKARP3TxducM=
github.com/cloudwego/gopkg v0.1.4 h1:EoQiCG4sTonTPHxOGE0VlQs+sQR+Hsi2uN0qqwu8O50=
github.com/cloudwego/gopkg v0.1.4/go.mod h1:FQuXsRWRsSqJLsMVd5SYzp8/Z1y5gXKnVvRrWUOsCMI=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU=
golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o=
golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=


================================================
FILE: internal/runner/runner.go
================================================
/*
 * Copyright 2025 CloudWeGo Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package runner

import (
	"context"
	"os"
	"strconv"

	bgopool "github.com/bytedance/gopkg/util/gopool"
	cgopool "github.com/cloudwego/gopkg/concurrency/gopool"
)

// RunTask runs the `f` in background, and `ctx` is optional.
// `ctx` is used to pass to underlying implementation
var RunTask func(ctx context.Context, f func())

func goRunTask(ctx context.Context, f func()) {
	go f()
}

func init() {
	// netpoll uses github.com/bytedance/gopkg/util/gopool by default
	// if the env is set, change it to cloudwego/gopkg
	// for most users, using the 'go' keyword directly is more suitable.
	if yes, _ := strconv.ParseBool(os.Getenv("USE_CLOUDWEGO_GOPOOL")); yes {
		RunTask = cgopool.CtxGo
	} else {
		RunTask = bgopool.CtxGo
	}
}

// UseGoRunTask updates RunTask with goRunTask which creates
// a new goroutine for the given func, basically `go f()`
func UseGoRunTask() {
	RunTask = goRunTask
}

// SetPanicHandler sets the panic handler for the global pool.
func SetPanicHandler(f func(context.Context, interface{})) {
	bgopool.SetPanicHandler(f)
	cgopool.SetPanicHandler(f)
}


================================================
FILE: internal/runner/runner_test.go
================================================
/*
 * Copyright 2025 CloudWeGo Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package runner

import (
	"context"
	"sync"
	"testing"
)

func TestRunTask(t *testing.T) {
	var wg sync.WaitGroup
	wg.Add(2)
	ctx := context.Background()
	RunTask(ctx, func() {
		wg.Done()
	})
	UseGoRunTask()
	RunTask(ctx, func() {
		wg.Done()
	})
	wg.Wait()
}


================================================
FILE: lint.sh
================================================
#!/usr/bin/env bash

golangci-lint run


================================================
FILE: mux/mux_test.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows
// +build !windows

package mux

import (
	"testing"
)

func MustNil(t *testing.T, val interface{}) {
	t.Helper()
	Assert(t, val == nil, val)
	if val != nil {
		t.Fatal("assertion nil failed, val=", val)
	}
}

func MustTrue(t *testing.T, cond bool) {
	t.Helper()
	if !cond {
		t.Fatal("assertion true failed.")
	}
}

func Equal(t *testing.T, got, expect interface{}) {
	t.Helper()
	if got != expect {
		t.Fatalf("assertion equal failed, got=[%v], expect=[%v]", got, expect)
	}
}

func Assert(t *testing.T, cond bool, val ...interface{}) {
	t.Helper()
	if !cond {
		if len(val) > 0 {
			val = append([]interface{}{"assertion failed:"}, val...)
			t.Fatal(val...)
		} else {
			t.Fatal("assertion failed")
		}
	}
}


================================================
FILE: mux/shard_queue.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package mux

import (
	"fmt"
	"runtime"
	"sync"
	"sync/atomic"

	"github.com/cloudwego/netpoll"
	"github.com/cloudwego/netpoll/internal/runner"
)

/* DOC:
 * ShardQueue uses the netpoll's nocopy API to merge and send data.
 * The Data Flush is passively triggered by ShardQueue.Add and does not require user operations.
 * If there is an error in the data transmission, the connection will be closed.
 *
 * ShardQueue.Add: add the data to be sent.
 * NewShardQueue: create a queue with netpoll.Connection.
 * ShardSize: the recommended number of shards is 32.
 */
var ShardSize int

func init() {
	ShardSize = runtime.GOMAXPROCS(0)
}

// NewShardQueue .
func NewShardQueue(size int, conn netpoll.Connection) (queue *ShardQueue) {
	queue = &ShardQueue{
		conn:    conn,
		size:    int32(size),
		getters: make([][]WriterGetter, size),
		swap:    make([]WriterGetter, 0, 64),
		locks:   make([]int32, size),
	}
	for i := range queue.getters {
		queue.getters[i] = make([]WriterGetter, 0, 64)
	}
	queue.list = make([]int32, size)
	return queue
}

// WriterGetter is used to get a netpoll.Writer.
type WriterGetter func() (buf netpoll.Writer, isNil bool)

// ShardQueue uses the netpoll's nocopy API to merge and send data.
// The Data Flush is passively triggered by ShardQueue.Add and does not require user operations.
// If there is an error in the data transmission, the connection will be closed.
// ShardQueue.Add: add the data to be sent.
type ShardQueue struct {
	conn      netpoll.Connection
	idx, size int32
	getters   [][]WriterGetter // len(getters) = size
	swap      []WriterGetter   // use for swap
	locks     []int32          // len(locks) = size
	queueTrigger
}

const (
	// queueTrigger state
	active  = 0
	closing = 1
	closed  = 2
)

// here for trigger
type queueTrigger struct {
	trigger  int32
	state    int32 // 0: active, 1: closing, 2: closed
	runNum   int32
	w, r     int32      // ptr of list
	list     []int32    // record the triggered shard
	listLock sync.Mutex // list total lock
}

// Add adds to q.getters[shard]
func (q *ShardQueue) Add(gts ...WriterGetter) {
	if atomic.LoadInt32(&q.state) != active {
		return
	}
	shard := atomic.AddInt32(&q.idx, 1) % q.size
	q.lock(shard)
	trigger := len(q.getters[shard]) == 0
	q.getters[shard] = append(q.getters[shard], gts...)
	q.unlock(shard)
	if trigger {
		q.triggering(shard)
	}
}

func (q *ShardQueue) Close() error {
	if !atomic.CompareAndSwapInt32(&q.state, active, closing) {
		return fmt.Errorf("shardQueue has been closed")
	}
	// wait for all tasks finished
	for atomic.LoadInt32(&q.state) != closed {
		if atomic.LoadInt32(&q.trigger) == 0 {
			atomic.StoreInt32(&q.state, closed)
			return nil
		}
		runtime.Gosched()
	}
	return nil
}

// triggering shard.
func (q *ShardQueue) triggering(shard int32) {
	q.listLock.Lock()
	q.w = (q.w + 1) % q.size
	q.list[q.w] = shard
	q.listLock.Unlock()

	if atomic.AddInt32(&q.trigger, 1) > 1 {
		return
	}
	q.foreach()
}

// foreach swap r & w. It's not concurrency safe.
func (q *ShardQueue) foreach() {
	if atomic.AddInt32(&q.runNum, 1) > 1 {
		return
	}
	runner.RunTask(nil, func() {
		var negNum int32 // is negative number of triggerNum
		for triggerNum := atomic.LoadInt32(&q.trigger); triggerNum > 0; {
			q.r = (q.r + 1) % q.size
			shared := q.list[q.r]

			// lock & swap
			q.lock(shared)
			tmp := q.getters[shared]
			q.getters[shared] = q.swap[:0]
			q.swap = tmp
			q.unlock(shared)

			// deal
			q.deal(q.swap)
			negNum--
			if triggerNum+negNum == 0 {
				triggerNum = atomic.AddInt32(&q.trigger, negNum)
				negNum = 0
			}
		}
		q.flush()

		// quit & check again
		atomic.StoreInt32(&q.runNum, 0)
		if atomic.LoadInt32(&q.trigger) > 0 {
			q.foreach()
			return
		}
		// if state is closing, change it to closed
		atomic.CompareAndSwapInt32(&q.state, closing, closed)
	})
}

// deal is used to get deal of netpoll.Writer.
func (q *ShardQueue) deal(gts []WriterGetter) {
	if !q.conn.IsActive() {
		return
	}
	writer := q.conn.Writer()
	for _, gt := range gts {
		buf, isNil := gt()
		if !isNil {
			err := writer.Append(buf)
			if err != nil {
				q.conn.Close()
				return
			}
		}
	}
}

// flush is used to flush netpoll.Writer.
func (q *ShardQueue) flush() {
	err := q.conn.Writer().Flush()
	if err != nil {
		q.conn.Close()
		return
	}
}

// lock shard.
func (q *ShardQueue) lock(shard int32) {
	for !atomic.CompareAndSwapInt32(&q.locks[shard], 0, 1) {
		runtime.Gosched()
	}
}

// unlock shard.
func (q *ShardQueue) unlock(shard int32) {
	atomic.StoreInt32(&q.locks[shard], 0)
}


================================================
FILE: mux/shard_queue_test.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows
// +build !windows

package mux

import (
	"net"
	"testing"
	"time"

	"github.com/cloudwego/netpoll"
)

func TestShardQueue(t *testing.T) {
	var svrConn net.Conn
	accepted := make(chan struct{})

	network, address := "tcp", "localhost:12345"
	ln, err := net.Listen("tcp", address)
	MustNil(t, err)
	stop := make(chan int, 1)
	defer close(stop)
	go func() {
		var err error
		for {
			select {
			case <-stop:
				err = ln.Close()
				MustNil(t, err)
				return
			default:
			}
			svrConn, err = ln.Accept()
			MustNil(t, err)
			accepted <- struct{}{}
		}
	}()

	conn, err := netpoll.DialConnection(network, address, time.Second)
	MustNil(t, err)
	<-accepted

	// test
	queue := NewShardQueue(4, conn)
	count, pkgsize := 16, 11
	for i := 0; i < count; i++ {
		var getter WriterGetter = func() (buf netpoll.Writer, isNil bool) {
			buf = netpoll.NewLinkBuffer(pkgsize)
			buf.Malloc(pkgsize)
			return buf, false
		}
		queue.Add(getter)
	}

	err = queue.Close()
	MustNil(t, err)
	total := count * pkgsize
	recv := make([]byte, total)
	rn, err := svrConn.Read(recv)
	MustNil(t, err)
	Equal(t, rn, total)
}

// TODO: need mock flush
func BenchmarkShardQueue(b *testing.B) {
	b.Skip()
}


================================================
FILE: net_dialer.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows
// +build !windows

package netpoll

import (
	"context"
	"net"
	"time"
)

// DialConnection is a default implementation of Dialer.
func DialConnection(network, address string, timeout time.Duration) (connection Connection, err error) {
	return defaultDialer.DialConnection(network, address, timeout)
}

// NewFDConnection create a Connection initialized by any fd
// It's useful for writing unit tests for functions that have args with the type of netpoll.Connection
// The typical usage is like:
//
//	rfd, wfd := netpoll.GetSysFdPairs()
//	rconn, _ = netpoll.NewFDConnection(rfd)
//	wconn, _ = netpoll.NewFDConnection(wfd)
func NewFDConnection(fd int) (Connection, error) {
	conn := new(connection)
	err := conn.init(&netFD{fd: fd}, nil)
	if err != nil {
		return nil, err
	}
	return conn, nil
}

// NewDialer only support TCP and unix socket now.
func NewDialer() Dialer {
	return &dialer{}
}

var defaultDialer = NewDialer()

type dialer struct{}

// DialTimeout implements Dialer.
func (d *dialer) DialTimeout(network, address string, timeout time.Duration) (net.Conn, error) {
	return d.DialConnection(network, address, timeout)
}

// DialConnection implements Dialer.
func (d *dialer) DialConnection(network, address string, timeout time.Duration) (connection Connection, err error) {
	ctx := context.Background()
	if timeout > 0 {
		subCtx, cancel := context.WithTimeout(ctx, timeout)
		defer cancel()
		ctx = subCtx
	}

	switch network {
	case "tcp", "tcp4", "tcp6":
		return d.dialTCP(ctx, network, address)
	case "unix", "unixgram", "unixpacket":
		raddr := &UnixAddr{
			UnixAddr: net.UnixAddr{Name: address, Net: network},
		}
		return DialUnix(network, nil, raddr)
	default:
		return nil, net.UnknownNetworkError(network)
	}
}

func (d *dialer) dialTCP(ctx context.Context, network, address string) (connection *TCPConnection, err error) {
	host, port, err := net.SplitHostPort(address)
	if err != nil {
		return nil, err
	}
	var portnum int
	if portnum, err = net.DefaultResolver.LookupPort(ctx, network, port); err != nil {
		return nil, err
	}
	var ipaddrs []net.IPAddr
	// host maybe empty if address is :12345
	if host == "" {
		ipaddrs = []net.IPAddr{{}}
	} else {
		ipaddrs, err = net.DefaultResolver.LookupIPAddr(ctx, host)
		if err != nil {
			return nil, err
		}
		if len(ipaddrs) == 0 {
			return nil, &net.DNSError{Err: "no such host", Name: host, IsNotFound: true}
		}
	}

	var firstErr error // The error from the first address is most relevant.
	tcpAddr := &TCPAddr{}
	for _, ipaddr := range ipaddrs {
		tcpAddr.IP = ipaddr.IP
		tcpAddr.Port = portnum
		tcpAddr.Zone = ipaddr.Zone
		if ipaddr.IP != nil && ipaddr.IP.To4() == nil {
			connection, err = DialTCP(ctx, "tcp6", nil, tcpAddr)
		} else {
			connection, err = DialTCP(ctx, "tcp", nil, tcpAddr)
		}
		if err == nil {
			return connection, nil
		}
		select {
		case <-ctx.Done(): // check timeout error
			return nil, err
		default:
		}
		if firstErr == nil {
			firstErr = err
		}
	}

	if firstErr == nil {
		firstErr = &net.OpError{Op: "dial", Net: network, Source: nil, Addr: nil, Err: errMissingAddress}
	}
	return nil, firstErr
}

// sysDialer contains a Dial's parameters and configuration.
type sysDialer struct {
	net.Dialer
	network, address string
}


================================================
FILE: net_dialer_test.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows
// +build !windows

package netpoll

import (
	"context"
	"fmt"
	"runtime"
	"strconv"
	"strings"
	"sync"
	"syscall"
	"testing"
	"time"
)

func TestDialerTCP(t *testing.T) {
	dialer := NewDialer()
	address := getTestAddress()
	conn, err := dialer.DialTimeout("tcp", address, time.Second)
	MustTrue(t, err != nil)
	MustTrue(t, conn.(*TCPConnection) == nil)

	ln, err := CreateListener("tcp", address)
	MustNil(t, err)

	stop := make(chan int, 1)
	defer close(stop)

	go func() {
		for {
			select {
			case <-stop:
				err := ln.Close()
				MustNil(t, err)
				return
			default:
			}
			conn, err := ln.Accept()
			if conn == nil && err == nil {
				continue
			}
		}
	}()

	conn, err = dialer.DialTimeout("tcp", address, time.Second)
	MustNil(t, err)
	MustTrue(t, strings.HasPrefix(conn.LocalAddr().String(), "127.0.0.1:"))
	Equal(t, conn.RemoteAddr().String(), address)
}

func TestDialerUnix(t *testing.T) {
	dialer := NewDialer()
	conn, err := dialer.DialTimeout("unix", "tmp.sock", time.Second)
	MustTrue(t, err != nil)
	MustTrue(t, conn.(*UnixConnection) == nil)

	ln, err := CreateListener("unix", "tmp.sock")
	MustNil(t, err)
	defer ln.Close()

	stop := make(chan int, 1)
	defer func() {
		close(stop)
		time.Sleep(time.Millisecond)
	}()

	go func() {
		for {
			select {
			case <-stop:
				err := ln.Close()
				MustNil(t, err)
				return
			default:
			}
			conn, err := ln.Accept()
			if conn == nil && err == nil {
				continue
			}
		}
	}()

	conn, err = dialer.DialTimeout("unix", "tmp.sock", time.Second)
	MustNil(t, err)
	if runtime.GOOS == "linux" {
		Equal(t, conn.LocalAddr().String(), "@")
	} else {
		Equal(t, conn.LocalAddr().String(), "")
	}
	Equal(t, conn.RemoteAddr().String(), "tmp.sock")
}

func TestDialerFdAlloc(t *testing.T) {
	address := getTestAddress()
	ln, err := CreateListener("tcp", address)
	MustNil(t, err)
	defer ln.Close()
	el1, _ := NewEventLoop(func(ctx context.Context, connection Connection) error {
		connection.Close()
		return nil
	})
	go func() {
		el1.Serve(ln)
	}()
	ctx1, cancel1 := context.WithTimeout(context.Background(), time.Second)
	defer cancel1()
	defer el1.Shutdown(ctx1)

	for i := 0; i < 100; i++ {
		conn, err := DialConnection("tcp", address, time.Second)
		MustNil(t, err)
		fd := conn.(*TCPConnection).fd
		conn.Write([]byte("hello world"))
		for conn.IsActive() {
			runtime.Gosched()
		}
		time.Sleep(time.Millisecond)
		syscall.SetNonblock(fd, true)
	}
}

func TestFDClose(t *testing.T) {
	address := getTestAddress()
	ln, err := CreateListener("tcp", address)
	MustNil(t, err)
	defer ln.Close()
	el1, _ := NewEventLoop(func(ctx context.Context, connection Connection) error {
		connection.Close()
		return nil
	})
	go func() {
		el1.Serve(ln)
	}()
	ctx1, cancel1 := context.WithTimeout(context.Background(), time.Second)
	defer cancel1()
	defer el1.Shutdown(ctx1)

	var fd int
	var conn Connection
	conn, err = DialConnection("tcp", address, time.Second)
	MustNil(t, err)
	fd = conn.(*TCPConnection).fd
	syscall.SetNonblock(fd, true)
	conn.Close()

	conn, err = DialConnection("tcp", address, time.Second)
	MustNil(t, err)
	fd = conn.(*TCPConnection).fd
	syscall.SetNonblock(fd, true)
	time.Sleep(time.Second)
	conn.Close()
}

// fd data package race test, use two servers and two dialers.
func TestDialerThenClose(t *testing.T) {
	address1 := getTestAddress()
	address2 := getTestAddress()
	// server 1
	ln1, _ := createTestListener("tcp", address1)
	el1 := mockDialerEventLoop(1)
	go func() {
		el1.Serve(ln1)
	}()
	ctx1, cancel1 := context.WithTimeout(context.Background(), time.Second)
	defer cancel1()
	defer el1.Shutdown(ctx1)

	// server 2
	ln2, _ := createTestListener("tcp", address2)
	el2 := mockDialerEventLoop(2)
	go func() {
		el2.Serve(ln2)
	}()
	ctx2, cancel2 := context.WithTimeout(context.Background(), time.Second)
	defer cancel2()
	defer el2.Shutdown(ctx2)

	size := 20
	var wg sync.WaitGroup
	wg.Add(size)
	for i := 0; i < size; i++ {
		go func() {
			defer wg.Done()
			for i := 0; i < 50; i++ {
				// send server 1
				conn, err := DialConnection("tcp", address1, time.Second)
				if err == nil {
					mockDialerSend(1, &conn.(*TCPConnection).connection)
				}
				// send server 2
				conn, err = DialConnection("tcp", address2, time.Second)
				if err == nil {
					mockDialerSend(2, &conn.(*TCPConnection).connection)
				}
			}
		}()
	}
	wg.Wait()
}

func TestNewFDConnection(t *testing.T) {
	r, w := GetSysFdPairs()
	rconn, err := NewFDConnection(r)
	MustNil(t, err)
	wconn, err := NewFDConnection(w)
	MustNil(t, err)
	_, err = rconn.Writer().WriteString("hello")
	MustNil(t, err)
	err = rconn.Writer().Flush()
	MustNil(t, err)
	buf, err := wconn.Reader().Next(5)
	MustNil(t, err)
	Equal(t, string(buf), "hello")
}

func mockDialerEventLoop(idx int) EventLoop {
	el, _ := NewEventLoop(func(ctx context.Context, conn Connection) (err error) {
		defer func() {
			if err != nil {
				fmt.Printf("Error: server%d conn closed: %s", idx, err.Error())
				conn.Close()
			}
		}()
		operator := conn.(*connection)
		fd := operator.fd
		msg := make([]byte, 15)
		n, err := operator.Read(msg)
		if err != nil {
			fmt.Printf("Error: conn[%d] server%d-read fail: %s", operator.fd, idx, err.Error())
			return err
		}
		if n < 1 {
			return nil
		}
		if string(msg[0]) != strconv.Itoa(idx) {
			panic(fmt.Sprintf("msg[%s] != [%d-xxx]", msg, idx))
		}

		ss := strings.Split(string(msg[:n]), "-")
		rfd, _ := strconv.Atoi(ss[1])
		_, err = operator.Write([]byte(fmt.Sprintf("%d-%d", idx, fd)))
		if err != nil {
			fmt.Printf("Error: conn[%d] rfd[%d] server%d-write fail: %s", operator.fd, rfd, idx, err.Error())
			return err
		}
		return nil
	})
	return el
}

func mockDialerSend(idx int, conn *connection) {
	defer func() {
		conn.Close()
	}()
	randID1 := []byte(fmt.Sprintf("%d-%d", idx, conn.fd))
	_, err := conn.Write(randID1)
	if err != nil {
		fmt.Printf("Error: conn[%d] client%d write fail: %s", conn.fd, idx, err.Error())
	}
	msg := make([]byte, 15)
	_, err = conn.Read(msg)
	if err != nil {
		fmt.Printf("Error: conn[%d] client%d Next fail: %s", conn.fd, idx, err.Error())
	}
}


================================================
FILE: net_io.go
================================================
// Copyright 2023 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build darwin || netbsd || freebsd || openbsd || dragonfly || linux
// +build darwin netbsd freebsd openbsd dragonfly linux

package netpoll

import "syscall"

// return value:
// - n: n == 0 but err == nil, retry syscall
// - err: if not nil, connection should be closed.
func ioread(fd int, bs [][]byte, ivs []syscall.Iovec) (n int, err error) {
	n, err = readv(fd, bs, ivs)
	if n == 0 && err == nil { // means EOF
		return 0, Exception(ErrEOF, "")
	}
	if err == syscall.EINTR || err == syscall.EAGAIN {
		return 0, nil
	}
	return n, err
}

// return value:
// - n: n == 0 but err == nil, retry syscall
// - err: if not nil, connection should be closed.
func iosend(fd int, bs [][]byte, ivs []syscall.Iovec, zerocopy bool) (n int, err error) {
	n, err = sendmsg(fd, bs, ivs, zerocopy)
	if err == syscall.EAGAIN {
		return 0, nil
	}
	return n, err
}


================================================
FILE: net_listener.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build darwin || netbsd || freebsd || openbsd || dragonfly || linux
// +build darwin netbsd freebsd openbsd dragonfly linux

package netpoll

import (
	"errors"
	"net"
	"os"
	"syscall"
)

// CreateListener return a new Listener.
func CreateListener(network, addr string) (l Listener, err error) {
	if network == "udp" || network == "udp4" || network == "udp6" {
		return nil, Exception(ErrUnsupported, "UDP")
	}
	// tcp, tcp4, tcp6, unix
	ln, err := net.Listen(network, addr)
	if err != nil {
		return nil, err
	}
	return ConvertListener(ln)
}

// ConvertListener converts net.Listener to Listener
func ConvertListener(l net.Listener) (nl Listener, err error) {
	if tmp, ok := l.(Listener); ok {
		return tmp, nil
	}
	ln := &listener{}
	ln.ln = l
	ln.addr = l.Addr()
	err = ln.parseFD()
	if err != nil {
		return nil, err
	}
	return ln, syscall.SetNonblock(ln.fd, true)
}

var _ net.Listener = &listener{}

type listener struct {
	fd   int
	addr net.Addr     // listener's local addr
	ln   net.Listener // tcp|unix listener
	file *os.File
}

// Accept implements Listener.
func (ln *listener) Accept() (net.Conn, error) {
	fd, sa, err := syscall.Accept(ln.fd)
	if err != nil {
		/* https://man7.org/linux/man-pages/man2/accept.2.html
		EAGAIN or EWOULDBLOCK
		  The socket is marked nonblocking and no connections are
		  present to be accepted.  POSIX.1-2001 and POSIX.1-2008
		  allow either error to be returned for this case, and do
		  not require these constants to have the same value, so a
		  portable application should check for both possibilities.
		*/
		if err == syscall.EAGAIN || err == syscall.EWOULDBLOCK {
			return nil, nil
		}
		return nil, err
	}
	nfd := &netFD{}
	nfd.fd = fd
	nfd.localAddr = ln.addr
	nfd.network = ln.addr.Network()
	nfd.remoteAddr = sockaddrToAddr(sa)
	return nfd, nil
}

// Close implements Listener.
func (ln *listener) Close() error {
	if ln.fd != 0 {
		syscall.Close(ln.fd)
	}
	if ln.file != nil {
		ln.file.Close()
	}
	if ln.ln != nil {
		ln.ln.Close()
	}
	return nil
}

// Addr implements Listener.
func (ln *listener) Addr() net.Addr {
	return ln.addr
}

// Fd implements Listener.
func (ln *listener) Fd() (fd int) {
	return ln.fd
}

func (ln *listener) parseFD() (err error) {
	switch netln := ln.ln.(type) {
	case *net.TCPListener:
		ln.file, err = netln.File()
	case *net.UnixListener:
		ln.file, err = netln.File()
	default:
		return errors.New("listener type can't support")
	}
	if err != nil {
		return err
	}
	ln.fd = int(ln.file.Fd())
	return nil
}


================================================
FILE: net_listener_test.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build darwin || netbsd || freebsd || openbsd || dragonfly || linux
// +build darwin netbsd freebsd openbsd dragonfly linux

package netpoll

import (
	"context"
	"net"
	"sync/atomic"
	"testing"
	"time"
)

func TestListenerDialer(t *testing.T) {
	network := "tcp"
	addr := getTestAddress()
	ln, err := CreateListener(network, addr)
	MustNil(t, err)
	defer ln.Close()
	trigger := make(chan int)
	msg := []byte("0123456789")

	go func() {
		for {
			conn, err := ln.Accept()
			if conn == nil && err == nil {
				continue
			}
			if err != nil {
				return
			}
			go func(conn net.Conn) {
				<-trigger
				buf := make([]byte, 10)
				n, err := conn.Read(buf)
				MustNil(t, err)
				Equal(t, n, len(msg))
				Equal(t, string(buf[:n]), string(msg))
				n, err = conn.Write(buf)
				MustNil(t, err)
				Equal(t, n, len(msg))
			}(conn)
		}
	}()

	// trigger
	var closed, read int32

	dialer := NewDialer()
	callback := func(connection Connection) error {
		atomic.StoreInt32(&closed, 1)
		return nil
	}
	onRequest := func(ctx context.Context, connection Connection) error {
		atomic.StoreInt32(&read, 1)
		err := connection.Close()
		MustNil(t, err)
		return err
	}
	for i := 0; i < 10; i++ {
		conn, err := dialer.DialConnection(network, addr, time.Second)
		if err != nil {
			continue
		}
		conn.AddCloseCallback(callback)
		conn.SetOnRequest(onRequest)

		MustNil(t, err)
		n, err := conn.Write(msg)
		MustNil(t, err)
		Equal(t, n, len(msg))
		time.Sleep(10 * time.Millisecond)
		trigger <- 1
		time.Sleep(10 * time.Millisecond)
		Equal(t, atomic.LoadInt32(&read), int32(1))
		Equal(t, atomic.LoadInt32(&closed), int32(1))
	}
}

func TestConvertListener(t *testing.T) {
	network, address := "unix", "mock.test.sock"
	ln, err := net.Listen(network, address)
	if err != nil {
		panic(err)
	}
	udsln, _ := ln.(*net.UnixListener)
	// udsln.SetUnlinkOnClose(false)

	nln, err := ConvertListener(udsln)
	if err != nil {
		panic(err)
	}
	err = nln.Close()
	if err != nil {
		panic(err)
	}
}


================================================
FILE: net_netfd.go
================================================
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//
// This file may have been modified by CloudWeGo authors. (“CloudWeGo Modifications”).
// All CloudWeGo Modifications are Copyright 2022 CloudWeGo authors.

//go:build aix || darwin || dragonfly || freebsd || linux || nacl || netbsd || openbsd || solaris
// +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris

package netpoll

import (
	"context"
	"errors"
	"net"
	"os"
	"runtime"
	"syscall"
	"time"
)

// nonDeadline and noCancel are just zero values for
// readability with functions taking too many parameters.
var noDeadline = time.Time{}

type netFD struct {
	// file descriptor
	fd int
	// When calling netFD.dial(), fd will be registered into poll in some scenarios, such as dialing tcp socket,
	// but not in other scenarios, such as dialing unix socket.
	// This leads to a different behavior in register poller at after, so use this field to mark it.
	pd *pollDesc
	// closed marks whether fd has expired
	closed uint32
	// Whether this is a streaming descriptor. Immutable.
	isStream bool
	// Whether a zero byte read indicates EOF. This is false for a
	// message based socket connection.
	zeroReadIsEOF bool
	family        int    // AF_INET, AF_INET6, syscall.AF_UNIX
	sotype        int    // syscall.SOCK_STREAM, syscall.SOCK_DGRAM, syscall.SOCK_RAW
	isConnected   bool   // handshake completed or use of association with peer
	network       string // tcp, tcp4, tcp6, unix, unixgram, unixpacket
	localAddr     net.Addr
	remoteAddr    net.Addr
	// for detaching conn from poller
	detaching bool
}

func newNetFD(fd, family, sotype int, net string) *netFD {
	ret := &netFD{}
	ret.fd = fd
	ret.network = net
	ret.family = family
	ret.sotype = sotype
	ret.isStream = sotype == syscall.SOCK_STREAM
	ret.zeroReadIsEOF = sotype != syscall.SOCK_DGRAM && sotype != syscall.SOCK_RAW
	return ret
}

// if dial connection error, you need exec netFD.Close actively
func (c *netFD) dial(ctx context.Context, laddr, raddr sockaddr) (err error) {
	var lsa syscall.Sockaddr
	if laddr != nil {
		if lsa, err = laddr.sockaddr(c.family); err != nil {
			return err
		} else if lsa != nil {
			// bind local address
			if err = syscall.Bind(c.fd, lsa); err != nil {
				return os.NewSyscallError("bind", err)
			}
		}
	}
	var rsa syscall.Sockaddr  // remote address from the user
	var crsa syscall.Sockaddr // remote address we actually connected to
	if raddr != nil {
		if rsa, err = raddr.sockaddr(c.family); err != nil {
			return err
		}
	}
	// remote address we actually connected to
	if crsa, err = c.connect(ctx, lsa, rsa); err != nil {
		return err
	}
	c.isConnected = true

	// Record the local and remote addresses from the actual socket.
	// Get the local address by calling Getsockname.
	// For the remote address, use
	// 1) the one returned by the connect method, if any; or
	// 2) the one from Getpeername, if it succeeds; or
	// 3) the one passed to us as the raddr parameter.
	lsa, _ = syscall.Getsockname(c.fd)
	c.localAddr = sockaddrToAddr(lsa)
	if crsa != nil {
		c.remoteAddr = sockaddrToAddr(crsa)
	} else if crsa, _ = syscall.Getpeername(c.fd); crsa != nil {
		c.remoteAddr = sockaddrToAddr(crsa)
	} else {
		c.remoteAddr = sockaddrToAddr(rsa)
	}
	return nil
}

func (c *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) (rsa syscall.Sockaddr, retErr error) {
	// Do not need to call c.writing here,
	// because c is not yet accessible to user,
	// so no concurrent operations are possible.
	switch err := syscall.Connect(c.fd, ra); err {
	case syscall.EINPROGRESS, syscall.EALREADY, syscall.EINTR:
	case nil, syscall.EISCONN:
		select {
		case <-ctx.Done():
			return nil, mapErr(ctx.Err())
		default:
		}
		return nil, nil
	case syscall.EINVAL:
		// On Solaris we can see EINVAL if the socket has
		// already been accepted and closed by the server.
		// Treat this as a successful connection--writes to
		// the socket will see EOF.  For details and a test
		// case in C see https://golang.org/issue/6828.
		if runtime.GOOS == "solaris" {
			return nil, nil
		}
		fallthrough
	default:
		return nil, os.NewSyscallError("connect", err)
	}

	c.pd = newPollDesc(c.fd)
	defer func() {
		// free operator to avoid leak
		c.pd.operator.Free()
		c.pd = nil
	}()
	for {
		// Performing multiple connect system calls on a
		// non-blocking socket under Unix variants does not
		// necessarily result in earlier errors being
		// returned. Instead, once runtime-integrated network
		// poller tells us that the socket is ready, get the
		// SO_ERROR socket option to see if the connection
		// succeeded or failed. See issue 7474 for further
		// details.
		if err := c.pd.WaitWrite(ctx); err != nil {
			return nil, err
		}
		nerr, err := syscall.GetsockoptInt(c.fd, syscall.SOL_SOCKET, syscall.SO_ERROR)
		if err != nil {
			return nil, os.NewSyscallError("getsockopt", err)
		}
		switch err := syscall.Errno(nerr); err {
		case syscall.EINPROGRESS, syscall.EALREADY, syscall.EINTR:
		case syscall.EISCONN:
			return nil, nil
		case syscall.Errno(0):
			// The runtime poller can wake us up spuriously;
			// see issues 14548 and 19289. Check that we are
			// really connected; if not, wait again.
			if rsa, err := syscall.Getpeername(c.fd); err == nil {
				return rsa, nil
			}
		default:
			return nil, os.NewSyscallError("connect", err)
		}
	}
}

// Various errors contained in OpError.
var (
	errMissingAddress = errors.New("missing address")
	errCanceled       = errors.New("operation was canceled")
	errIOTimeout      = errors.New("i/o timeout")
)

// mapErr maps from the context errors to the historical internal net
// error values.
//
// TODO(bradfitz): get rid of this after adjusting tests and making
// context.DeadlineExceeded implement net.Error?
func mapErr(err error) error {
	switch err {
	case context.Canceled:
		return errCanceled
	case context.DeadlineExceeded:
		return errIOTimeout
	default:
		return err
	}
}


================================================
FILE: net_netfd_conn.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build darwin || netbsd || freebsd || openbsd || dragonfly || linux
// +build darwin netbsd freebsd openbsd dragonfly linux

package netpoll

import (
	"net"
	"strings"
	"sync/atomic"
	"syscall"
	"time"
)

var _ Conn = &netFD{}

// Fd implements Conn.
func (c *netFD) Fd() (fd int) {
	return c.fd
}

// Read implements Conn.
func (c *netFD) Read(b []byte) (n int, err error) {
	n, err = syscall.Read(c.fd, b)
	if err != nil {
		if err == syscall.EAGAIN || err == syscall.EINTR {
			return 0, nil
		}
	}
	return n, err
}

// Write implements Conn.
func (c *netFD) Write(b []byte) (n int, err error) {
	n, err = syscall.Write(c.fd, b)
	if err != nil {
		if err == syscall.EAGAIN {
			return 0, nil
		}
	}
	return n, err
}

// Close will be executed only once.
func (c *netFD) Close() (err error) {
	if atomic.AddUint32(&c.closed, 1) != 1 {
		return nil
	}
	if !c.detaching && c.fd > 2 {
		err = syscall.Close(c.fd)
		if err != nil {
			logger.Printf("NETPOLL: netFD[%d] close error: %s", c.fd, err.Error())
		}
	}
	return err
}

// LocalAddr implements Conn.
func (c *netFD) LocalAddr() (addr net.Addr) {
	return c.localAddr
}

// RemoteAddr implements Conn.
func (c *netFD) RemoteAddr() (addr net.Addr) {
	return c.remoteAddr
}

// SetKeepAlive implements Conn.
// TODO: only tcp conn is ok.
func (c *netFD) SetKeepAlive(second int) error {
	if !strings.HasPrefix(c.network, "tcp") {
		return nil
	}
	if second > 0 {
		return SetKeepAlive(c.fd, second)
	}
	return nil
}

// SetDeadline implements Conn.
func (c *netFD) SetDeadline(t time.Time) error {
	return Exception(ErrUnsupported, "SetDeadline")
}

// SetReadDeadline implements Conn.
func (c *netFD) SetReadDeadline(t time.Time) error {
	return Exception(ErrUnsupported, "SetReadDeadline")
}

// SetWriteDeadline implements Conn.
func (c *netFD) SetWriteDeadline(t time.Time) error {
	return Exception(ErrUnsupported, "SetWriteDeadline")
}


================================================
FILE: net_polldesc.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows
// +build !windows

package netpoll

import (
	"context"
)

func newPollDesc(fd int) *pollDesc {
	pd := &pollDesc{}
	poll := pollmanager.Pick()
	pd.operator = poll.Alloc()
	pd.operator.poll = poll
	pd.operator.FD = fd
	pd.operator.OnWrite = pd.onwrite
	pd.operator.OnHup = pd.onhup
	pd.writeTrigger = make(chan struct{})
	pd.closeTrigger = make(chan struct{})
	return pd
}

type pollDesc struct {
	operator *FDOperator
	// The write event is OneShot, then mark the writable to skip duplicate calling.
	writeTrigger chan struct{}
	closeTrigger chan struct{}
}

// WaitWrite .
func (pd *pollDesc) WaitWrite(ctx context.Context) (err error) {
	if pd.operator.isUnused() {
		// add ET|Write|Hup
		if err = pd.operator.Control(PollWritable); err != nil {
			logger.Printf("NETPOLL: pollDesc register operator failed: %v", err)
			return err
		}
	}

	select {
	case <-pd.writeTrigger: // triggered by poller
	case <-pd.closeTrigger: // triggered by poller
		// no need to detach, since poller has done it in OnHup.
		return Exception(ErrConnClosed, "by peer")
	case <-ctx.Done(): // triggered by ctx
		// deregister from poller, upper caller function will close fd
		pd.detach()
		return mapErr(ctx.Err())
	}
	// double check close trigger
	select {
	case <-pd.closeTrigger:
		return Exception(ErrConnClosed, "by peer")
	default:
		return nil
	}
}

func (pd *pollDesc) onwrite(p Poll) error {
	select {
	case <-pd.writeTrigger:
	default:
		pd.detach()
		close(pd.writeTrigger)
	}
	return nil
}

func (pd *pollDesc) onhup(p Poll) error {
	select {
	case <-pd.closeTrigger:
	default:
		close(pd.closeTrigger)
	}
	return nil
}

func (pd *pollDesc) detach() {
	if err := pd.operator.Control(PollDetach); err != nil {
		logger.Printf("NETPOLL: pollDesc detach operator failed: %v", err)
	}
}


================================================
FILE: net_polldesc_test.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows
// +build !windows

package netpoll

import (
	"testing"
	"time"
)

func TestZeroTimer(t *testing.T) {
	MustTrue(t, noDeadline.IsZero())
}

func TestRuntimePoll(t *testing.T) {
	address := getTestAddress()
	ln, err := CreateListener("tcp", address)
	MustNil(t, err)

	stop := make(chan int, 1)
	defer close(stop)

	go func() {
		for {
			select {
			case <-stop:
				err := ln.Close()
				MustNil(t, err)
				return
			default:
			}
			conn, err := ln.Accept()
			if conn == nil && err == nil {
				continue
			}
		}
	}()

	for i := 0; i < 10; i++ {
		conn, err := DialConnection("tcp", address, time.Second)
		MustNil(t, err)
		conn.Close()
	}
}


================================================
FILE: net_sock.go
================================================
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//
// This file may have been modified by CloudWeGo authors. (“CloudWeGo Modifications”).
// All CloudWeGo Modifications are Copyright 2022 CloudWeGo authors.

//go:build !windows
// +build !windows

package netpoll

import (
	"context"
	"net"
	"runtime"
	"syscall"
)

// A sockaddr represents a TCP, IP or Unix network endpoint
// address that can be converted into a syscall.Sockaddr.
type sockaddr interface {
	net.Addr

	// family returns the platform-dependent address family
	// identifier.
	family() int

	// isWildcard reports whether the address is a wildcard
	// address.
	isWildcard() bool

	// sockaddr returns the address converted into a syscall
	// sockaddr type that implements syscall.Sockaddr
	// interface. It returns a nil interface when the address is nil.
	sockaddr(family int) (syscall.Sockaddr, error)

	// toLocal maps the zero address to a local system address (127.0.0.1 or ::1)
	toLocal(net string) sockaddr
}

func internetSocket(ctx context.Context, net string, laddr, raddr sockaddr, sotype, proto int, mode string) (conn *netFD, err error) {
	if (runtime.GOOS == "aix" || runtime.GOOS == "openbsd" || runtime.GOOS == "nacl") && raddr.isWildcard() {
		raddr = raddr.toLocal(net)
	}
	family, ipv6only := favoriteAddrFamily(net, laddr, raddr)
	return socket(ctx, net, family, sotype, proto, ipv6only, laddr, raddr)
}

// favoriteAddrFamily returns the appropriate address family for the
// given network, laddr, raddr and mode.
//
// If mode indicates "listen" and laddr is a wildcard, we assume that
// the user wants to make a passive-open connection with a wildcard
// address family, both AF_INET and AF_INET6, and a wildcard address
// like the following:
//
//   - A listen for a wildcard communication domain, "tcp",
//     with a wildcard address: If the platform supports
//     both IPv6 and IPv4-mapped IPv6 communication capabilities,
//     or does not support IPv4, we use a dual stack, AF_INET6 and
//     IPV6_V6ONLY=0, wildcard address listen. The dual stack
//     wildcard address listen may fall back to an IPv6-only,
//     AF_INET6 and IPV6_V6ONLY=1, wildcard address listen.
//     Otherwise we prefer an IPv4-only, AF_INET, wildcard address
//     listen.
//
//   - A listen for a wildcard communication domain, "tcp",
//     with an IPv4 wildcard address: same as above.
//
//   - A listen for a wildcard communication domain, "tcp",
//     with an IPv6 wildcard address: same as above.
//
//   - A listen for an IPv4 communication domain, "tcp4",
//     with an IPv4 wildcard address: We use an IPv4-only, AF_INET,
//     wildcard address listen.
//
//   - A listen for an IPv6 communication domain, "tcp6",
//     with an IPv6 wildcard address: We use an IPv6-only, AF_INET6
//     and IPV6_V6ONLY=1, wildcard address listen.
//
// Otherwise guess: If the addresses are IPv4 then returns AF_INET,
// or else returns AF_INET6. It also returns a boolean value what
// designates IPV6_V6ONLY option.
//
// Note that the latest DragonFly BSD and OpenBSD kernels allow
// neither "net.inet6.ip6.v6only=1" change nor IPPROTO_IPV6 level
// IPV6_V6ONLY socket option setting.
func favoriteAddrFamily(network string, laddr, raddr sockaddr) (family int, ipv6only bool) {
	switch network[len(network)-1] {
	case '4':
		return syscall.AF_INET, false
	case '6':
		return syscall.AF_INET6, true
	}
	if (laddr == nil || laddr.family() == syscall.AF_INET) &&
		(raddr == nil || raddr.family() == syscall.AF_INET) {
		return syscall.AF_INET, false
	}
	return syscall.AF_INET6, false
}

// socket returns a network file descriptor that is ready for
// asynchronous I/O using the network poller.
func socket(ctx context.Context, net string, family, sotype, proto int, ipv6only bool, laddr, raddr sockaddr) (netfd *netFD, err error) {
	// syscall.Socket & set socket options
	var fd int
	fd, err = sysSocket(family, sotype, proto)
	if err != nil {
		return nil, err
	}
	err = setDefaultSockopts(fd, family, sotype, ipv6only)
	if err != nil {
		syscall.Close(fd)
		return nil, err
	}

	netfd = newNetFD(fd, family, sotype, net)
	err = netfd.dial(ctx, laddr, raddr)
	if err != nil {
		netfd.Close()
		return nil, err
	}
	return netfd, nil
}

// sockaddrToAddr returns a go/net friendly address
func sockaddrToAddr(sa syscall.Sockaddr) net.Addr {
	var a net.Addr
	switch sa := sa.(type) {
	case *syscall.SockaddrInet4:
		a = &net.TCPAddr{
			IP:   sa.Addr[0:],
			Port: sa.Port,
		}
	case *syscall.SockaddrInet6:
		var zone string
		if sa.ZoneId != 0 {
			if ifi, err := net.InterfaceByIndex(int(sa.ZoneId)); err == nil {
				zone = ifi.Name
			}
		}
		// if zone == "" && sa.ZoneId != 0 {
		// }
		a = &net.TCPAddr{
			IP:   sa.Addr[0:],
			Port: sa.Port,
			Zone: zone,
		}
	case *syscall.SockaddrUnix:
		a = &net.UnixAddr{Net: "unix", Name: sa.Name}
	}
	return a
}


================================================
FILE: net_tcpsock.go
================================================
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//
// This file may have been modified by CloudWeGo authors. (“CloudWeGo Modifications”).
// All CloudWeGo Modifications are Copyright 2022 CloudWeGo authors.

//go:build !windows
// +build !windows

package netpoll

import (
	"context"
	"net"
	"os"
	"syscall"
)

// TCPAddr represents the address of a TCP end point.
type TCPAddr struct {
	net.TCPAddr
}

func (a *TCPAddr) isWildcard() bool {
	if a == nil || a.IP == nil {
		return true
	}
	return a.IP.IsUnspecified()
}

func (a *TCPAddr) opAddr() net.Addr {
	if a == nil {
		return nil
	}
	return a
}

func (a *TCPAddr) family() int {
	if a == nil || len(a.IP) <= net.IPv4len {
		return syscall.AF_INET
	}
	if a.IP.To4() != nil {
		return syscall.AF_INET
	}
	return syscall.AF_INET6
}

func (a *TCPAddr) sockaddr(family int) (syscall.Sockaddr, error) {
	if a == nil {
		return nil, nil
	}
	return ipToSockaddr(family, a.IP, a.Port, a.Zone)
}

func (a *TCPAddr) toLocal(network string) sockaddr {
	addr := &TCPAddr{}
	addr.IP = loopbackIP(network)
	addr.Port = a.Port
	addr.Zone = a.Zone
	return addr
}

func loopbackIP(network string) net.IP {
	if network != "" && network[len(network)-1] == '6' {
		return net.IPv6loopback
	}
	return net.IP{127, 0, 0, 1}
}

func ipToSockaddr(family int, ip net.IP, port int, zone string) (syscall.Sockaddr, error) {
	switch family {
	case syscall.AF_INET:
		if len(ip) == 0 {
			ip = net.IPv4zero
		}
		ip4 := ip.To4()
		if ip4 == nil {
			return nil, &net.AddrError{Err: "non-IPv4 address", Addr: ip.String()}
		}
		sa := &syscall.SockaddrInet4{Port: port}
		copy(sa.Addr[:], ip4)
		return sa, nil
	case syscall.AF_INET6:
		// In general, an IP wildcard address, which is either
		// "0.0.0.0" or "::", means the entire IP addressing
		// space. For some historical reason, it is used to
		// specify "any available address" on some operations
		// of IP node.
		//
		// When the IP node supports IPv4-mapped IPv6 address,
		// we allow an listener to listen to the wildcard
		// address of both IP addressing spaces by specifying
		// IPv6 wildcard address.
		if len(ip) == 0 || ip.Equal(net.IPv4zero) {
			ip = net.IPv6zero
		}
		// We accept any IPv6 address including IPv4-mapped
		// IPv6 address.
		ip6 := ip.To16()
		if ip6 == nil {
			return nil, &net.AddrError{Err: "non-IPv6 address", Addr: ip.String()}
		}
		// TODO: sa := &syscall.SockaddrInet6{Port: port, ZoneId: uint32(zoneCache.index(zone))}
		sa := &syscall.SockaddrInet6{Port: port}
		copy(sa.Addr[:], ip6)
		return sa, nil
	}
	return nil, &net.AddrError{Err: "invalid address family", Addr: ip.String()}
}

// ResolveTCPAddr returns an address of TCP end point.
//
// The network must be a TCP network name.
//
// If the host in the address parameter is not a literal IP address or
// the port is not a literal port number, ResolveTCPAddr resolves the
// address to an address of TCP end point.
// Otherwise, it parses the address as a pair of literal IP address
// and port number.
// The address parameter can use a host name, but this is not
// recommended, because it will return at most one of the host name's
// IP addresses.
//
// See func Dial for a description of the network and address
// parameters.
func ResolveTCPAddr(network, address string) (*TCPAddr, error) {
	addr, err := net.ResolveTCPAddr(network, address)
	if err != nil {
		return nil, err
	}
	return &TCPAddr{*addr}, nil
}

// TCPConnection implements Connection.
type TCPConnection struct {
	connection
}

// newTCPConnection wraps *TCPConnection.
func newTCPConnection(conn Conn) (connection *TCPConnection, err error) {
	connection = &TCPConnection{}
	err = connection.init(conn, nil)
	if err != nil {
		return nil, err
	}
	return connection, nil
}

// DialTCP acts like Dial for TCP networks.
//
// The network must be a TCP network name; see func Dial for details.
//
// If laddr is nil, a local address is automatically chosen.
// If the IP field of raddr is nil or an unspecified IP address, the
// local system is assumed.
func DialTCP(ctx context.Context, network string, laddr, raddr *TCPAddr) (*TCPConnection, error) {
	switch network {
	case "tcp", "tcp4", "tcp6":
	default:
		return nil, &net.OpError{Op: "dial", Net: network, Source: laddr.opAddr(), Addr: raddr.opAddr(), Err: net.UnknownNetworkError(network)}
	}
	if raddr == nil {
		return nil, &net.OpError{Op: "dial", Net: network, Source: laddr.opAddr(), Addr: nil, Err: errMissingAddress}
	}
	if ctx == nil {
		ctx = context.Background()
	}
	sd := &sysDialer{network: network, address: raddr.String()}
	c, err := sd.dialTCP(ctx, laddr, raddr)
	if err != nil {
		return nil, &net.OpError{Op: "dial", Net: network, Source: laddr.opAddr(), Addr: raddr.opAddr(), Err: err}
	}
	return c, nil
}

func (sd *sysDialer) dialTCP(ctx context.Context, laddr, raddr *TCPAddr) (*TCPConnection, error) {
	conn, err := internetSocket(ctx, sd.network, laddr, raddr, syscall.SOCK_STREAM, 0, "dial")

	// TCP has a rarely used mechanism called a 'simultaneous connection' in
	// which Dial("tcp", addr1, addr2) run on the machine at addr1 can
	// connect to a simultaneous Dial("tcp", addr2, addr1) run on the machine
	// at addr2, without either machine executing Listen. If laddr == nil,
	// it means we want the kernel to pick an appropriate originating local
	// address. Some Linux kernels cycle blindly through a fixed range of
	// local ports, regardless of destination port. If a kernel happens to
	// pick local port 50001 as the source for a Dial("tcp", "", "localhost:50001"),
	// then the Dial will succeed, having simultaneously connected to itself.
	// This can only happen when we are letting the kernel pick a port (laddr == nil)
	// and when there is no listener for the destination address.
	// It's hard to argue this is anything other than a kernel bug. If we
	// see this happen, rather than expose the buggy effect to users, we
	// close the conn and try again. If it happens twice more, we relent and
	// use the result. See also:
	// 	https://golang.org/issue/2690
	// 	https://stackoverflow.com/questions/4949858/
	//
	// The opposite can also happen: if we ask the kernel to pick an appropriate
	// originating local address, sometimes it picks one that is already in use.
	// So if the error is EADDRNOTAVAIL, we have to try again too, just for
	// a different reason.
	//
	// The kernel socket code is no doubt enjoying watching us squirm.
	for i := 0; i < 2 && (laddr == nil || laddr.Port == 0) && (selfConnect(conn, err) || spuriousENOTAVAIL(err)); i++ {
		if err == nil {
			conn.Close()
		}
		conn, err = internetSocket(ctx, sd.network, laddr, raddr, syscall.SOCK_STREAM, 0, "dial")
	}

	if err != nil {
		return nil, err
	}
	return newTCPConnection(conn)
}

func selfConnect(conn *netFD, err error) bool {
	// If the connect failed, we clearly didn't connect to ourselves.
	if err != nil {
		return false
	}

	// The socket constructor can return an conn with raddr nil under certain
	// unknown conditions. The errors in the calls there to Getpeername
	// are discarded, but we can't catch the problem there because those
	// calls are sometimes legally erroneous with a "socket not connected".
	// Since this code (selfConnect) is already trying to work around
	// a problem, we make sure if this happens we recognize trouble and
	// ask the DialTCP routine to try again.
	// TODO: try to understand what's really going on.
	if conn.localAddr == nil || conn.remoteAddr == nil {
		return true
	}
	l := conn.localAddr.(*net.TCPAddr)
	r := conn.remoteAddr.(*net.TCPAddr)
	return l.Port == r.Port && l.IP.Equal(r.IP)
}

func spuriousENOTAVAIL(err error) bool {
	if op, ok := err.(*net.OpError); ok {
		err = op.Err
	}
	if sys, ok := err.(*os.SyscallError); ok {
		err = sys.Err
	}
	return err == syscall.EADDRNOTAVAIL
}


================================================
FILE: net_unixsock.go
================================================
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//
// This file may have been modified by CloudWeGo authors. (“CloudWeGo Modifications”).
// All CloudWeGo Modifications are Copyright 2022 CloudWeGo authors.

//go:build !windows
// +build !windows

package netpoll

import (
	"context"
	"errors"
	"net"
	"syscall"
)

// BUG(mikio): On JS, NaCl and Plan 9, methods and functions related
// to UnixConn and UnixListener are not implemented.

// BUG(mikio): On Windows, methods and functions related to UnixConn
// and UnixListener don't work for "unixgram" and "unixpacket".

// UnixAddr represents the address of a Unix domain socket end point.
type UnixAddr struct {
	net.UnixAddr
}

func (a *UnixAddr) isWildcard() bool {
	return a == nil || a.Name == ""
}

func (a *UnixAddr) opAddr() net.Addr {
	if a == nil {
		return nil
	}
	return a
}

func (a *UnixAddr) family() int {
	return syscall.AF_UNIX
}

func (a *UnixAddr) sockaddr(family int) (syscall.Sockaddr, error) {
	if a == nil {
		return nil, nil
	}
	return &syscall.SockaddrUnix{Name: a.Name}, nil
}

func (a *UnixAddr) toLocal(net string) sockaddr {
	return a
}

// ResolveUnixAddr returns an address of Unix domain socket end point.
//
// The network must be a Unix network name.
//
// See func Dial for a description of the network and address
// parameters.
func ResolveUnixAddr(network, address string) (*UnixAddr, error) {
	addr, err := net.ResolveUnixAddr(network, address)
	if err != nil {
		return nil, err
	}
	return &UnixAddr{*addr}, nil
}

// UnixConnection implements Connection.
type UnixConnection struct {
	connection
}

// newUnixConnection wraps UnixConnection.
func newUnixConnection(conn Conn) (connection *UnixConnection, err error) {
	connection = &UnixConnection{}
	err = connection.init(conn, nil)
	if err != nil {
		return nil, err
	}
	return connection, nil
}

// DialUnix acts like Dial for Unix networks.
//
// The network must be a Unix network name; see func Dial for details.
//
// If laddr is non-nil, it is used as the local address for the
// connection.
func DialUnix(network string, laddr, raddr *UnixAddr) (*UnixConnection, error) {
	switch network {
	case "unix", "unixgram", "unixpacket":
	default:
		return nil, &net.OpError{Op: "dial", Net: network, Source: laddr.opAddr(), Addr: raddr.opAddr(), Err: net.UnknownNetworkError(network)}
	}
	sd := &sysDialer{network: network, address: raddr.String()}
	c, err := sd.dialUnix(context.Background(), laddr, raddr)
	if err != nil {
		return nil, &net.OpError{Op: "dial", Net: network, Source: laddr.opAddr(), Addr: raddr.opAddr(), Err: err}
	}
	return c, nil
}

func (sd *sysDialer) dialUnix(ctx context.Context, laddr, raddr *UnixAddr) (*UnixConnection, error) {
	conn, err := unixSocket(ctx, sd.network, laddr, raddr, "dial")
	if err != nil {
		return nil, err
	}
	return newUnixConnection(conn)
}

func unixSocket(ctx context.Context, network string, laddr, raddr sockaddr, mode string) (conn *netFD, err error) {
	var sotype int
	switch network {
	case "unix":
		sotype = syscall.SOCK_STREAM
	case "unixgram":
		sotype = syscall.SOCK_DGRAM
	case "unixpacket":
		sotype = syscall.SOCK_SEQPACKET
	default:
		return nil, net.UnknownNetworkError(network)
	}

	switch mode {
	case "dial":
		if laddr != nil && laddr.isWildcard() {
			laddr = nil
		}
		if raddr != nil && raddr.isWildcard() {
			raddr = nil
		}
		if raddr == nil && (sotype != syscall.SOCK_DGRAM || laddr == nil) {
			return nil, errMissingAddress
		}
	case "listen":
	default:
		return nil, errors.New("unknown mode: " + mode)
	}

	return socket(ctx, network, syscall.AF_UNIX, sotype, 0, false, laddr, raddr)
}


================================================
FILE: netpoll_config.go
================================================
// Copyright 2024 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package netpoll

import (
	"context"
	"io"
)

// global config
var (
	defaultLinkBufferSize = pagesize
)

// Config expose some tuning parameters to control the internal behaviors of netpoll.
// Every parameter with the default zero value should keep the default behavior of netpoll.
type Config struct {
	PollerNum    int                                 // number of pollers
	BufferSize   int                                 // default size of a new connection's LinkBuffer
	Runner       func(ctx context.Context, f func()) // runner for event handler, most of the time use a goroutine pool.
	LoggerOutput io.Writer                           // logger output
	LoadBalance  LoadBalance                         // load balance for poller picker
	Feature                                          // define all features that not enable by default
}

// Feature expose some new features maybe promoted as a default behavior but not yet.
type Feature struct {
	// Deprecated: AlwaysNoCopyRead has no effect and will be removed in a future release.
	AlwaysNoCopyRead bool
}


================================================
FILE: netpoll_options.go
================================================
// Copyright 2024 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package netpoll

import "time"

// Option .
type Option struct {
	f func(*options)
}

type options struct {
	onPrepare    OnPrepare
	onConnect    OnConnect
	onDisconnect OnDisconnect
	onRequest    OnRequest
	readTimeout  time.Duration
	writeTimeout time.Duration
	idleTimeout  time.Duration
}

// WithOnPrepare registers the OnPrepare method to EventLoop.
func WithOnPrepare(onPrepare OnPrepare) Option {
	return Option{func(op *options) {
		op.onPrepare = onPrepare
	}}
}

// WithOnConnect registers the OnConnect method to EventLoop.
func WithOnConnect(onConnect OnConnect) Option {
	return Option{func(op *options) {
		op.onConnect = onConnect
	}}
}

// WithOnDisconnect registers the OnDisconnect method to EventLoop.
func WithOnDisconnect(onDisconnect OnDisconnect) Option {
	return Option{func(op *options) {
		op.onDisconnect = onDisconnect
	}}
}

// WithReadTimeout sets the read timeout of connections.
func WithReadTimeout(timeout time.Duration) Option {
	return Option{func(op *options) {
		op.readTimeout = timeout
	}}
}

// WithWriteTimeout sets the write timeout of connections.
func WithWriteTimeout(timeout time.Duration) Option {
	return Option{func(op *options) {
		op.writeTimeout = timeout
	}}
}

// WithIdleTimeout sets the idle timeout of connections.
func WithIdleTimeout(timeout time.Duration) Option {
	return Option{func(op *options) {
		op.idleTimeout = timeout
	}}
}


================================================
FILE: netpoll_server.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows
// +build !windows

package netpoll

import (
	"context"
	"errors"
	"strings"
	"sync"
	"syscall"
	"time"
)

// newServer wrap listener into server, quit will be invoked when server exit.
func newServer(ln Listener, opts *options, onQuit func(err error)) *server {
	return &server{
		ln:     ln,
		opts:   opts,
		onQuit: onQuit,
	}
}

type server struct {
	operator    FDOperator
	ln          Listener
	opts        *options
	onQuit      func(err error)
	connections sync.Map // key=fd, value=connection
}

// Run this server.
func (s *server) Run() (err error) {
	s.operator = FDOperator{
		FD:     s.ln.Fd(),
		OnRead: s.OnRead,
		OnHup:  s.OnHup,
	}
	s.operator.poll = pollmanager.Pick()
	err = s.operator.Control(PollReadable)
	if err != nil {
		s.onQuit(err)
	}
	return err
}

// Close this server with deadline.
func (s *server) Close(ctx context.Context) error {
	s.operator.Control(PollDetach)
	s.ln.Close()

	for {
		activeConn := 0
		s.connections.Range(func(key, value interface{}) bool {
			conn, ok := value.(gracefulExit)
			if !ok || conn.isIdle() {
				value.(Connection).Close()
			} else {
				activeConn++
			}
			return true
		})
		if activeConn == 0 { // all connections have been closed
			return nil
		}

		// smart control graceful shutdown check internal
		// we should wait for more time if there are more active connections
		waitTime := time.Millisecond * time.Duration(activeConn)
		if waitTime > time.Second { // max wait time is 1000 ms
			waitTime = time.Millisecond * 1000
		} else if waitTime < time.Millisecond*50 { // min wait time is 50 ms
			waitTime = time.Millisecond * 50
		}
		select {
		case <-ctx.Done():
			return ctx.Err()
		case <-time.After(waitTime):
			continue
		}
	}
}

// OnRead implements FDOperator.
func (s *server) OnRead(p Poll) error {
	// accept socket
	conn, err := s.ln.Accept()
	if err == nil {
		if conn != nil {
			s.onAccept(conn.(Conn))
		}
		// EAGAIN | EWOULDBLOCK if conn and err both nil
		return nil
	}
	logger.Printf("NETPOLL: accept conn failed: %v", err)

	// delay accept when too many open files
	if isOutOfFdErr(err) {
		// since we use Epoll LT, we have to detach listener fd from epoll first
		// and re-register it when accept successfully or there is no available connection
		cerr := s.operator.Control(PollDetach)
		if cerr != nil {
			logger.Printf("NETPOLL: detach listener fd failed: %v", cerr)
			return err
		}
		go func() {
			retryTimes := []time.Duration{0, 10, 50, 100, 200, 500, 1000} // ms
			retryTimeIndex := 0
			for {
				if retryTimeIndex > 0 {
					time.Sleep(retryTimes[retryTimeIndex] * time.Millisecond)
				}
				conn, err := s.ln.Accept()
				if err == nil {
					if conn == nil {
						// recovery accept poll loop
						s.operator.Control(PollReadable)
						return
					}
					s.onAccept(conn.(Conn))
					logger.Println("NETPOLL: re-accept conn success:", conn.RemoteAddr())
					retryTimeIndex = 0
					continue
				}
				if retryTimeIndex+1 < len(retryTimes) {
					retryTimeIndex++
				}
				logger.Printf("NETPOLL: re-accept conn failed, err=[%s] and next retrytime=%dms", err.Error(), retryTimes[retryTimeIndex])
			}
		}()
	}

	// shut down
	if strings.Contains(err.Error(), "closed") {
		s.operator.Control(PollDetach)
		s.onQuit(err)
		return err
	}

	return err
}

// OnHup implements FDOperator.
func (s *server) OnHup(p Poll) error {
	s.onQuit(errors.New("listener close"))
	return nil
}

func (s *server) onAccept(conn Conn) {
	// store & register connection
	nconn := new(connection)
	nconn.init(conn, s.opts)
	if !nconn.IsActive() {
		return
	}
	fd := conn.Fd()
	nconn.AddCloseCallback(func(connection Connection) error {
		s.connections.Delete(fd)
		return nil
	})
	s.connections.Store(fd, nconn)

	// trigger onConnect asynchronously
	nconn.onConnect()
}

func isOutOfFdErr(err error) bool {
	se, ok := err.(syscall.Errno)
	return ok && (se == syscall.EMFILE || se == syscall.ENFILE)
}


================================================
FILE: netpoll_unix.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build darwin || netbsd || freebsd || openbsd || dragonfly || linux
// +build darwin netbsd freebsd openbsd dragonfly linux

package netpoll

import (
	"context"
	"io"
	"log"
	"net"
	"os"
	"runtime"
	"sync"

	"github.com/cloudwego/netpoll/internal/runner"
)

var (
	pollmanager = newManager(runtime.GOMAXPROCS(0)/20 + 1) // pollmanager manage all pollers
	logger      = log.New(os.Stderr, "", log.LstdFlags)
)

// Initialize the pollers actively. By default, it's lazy initialized.
// It's safe to call it multi times.
func Initialize() {
	// The first call of Pick() will init pollers
	_ = pollmanager.Pick()
}

// Configure the internal behaviors of netpoll.
// Configure must called in init() function, because the poller will read some global variable after init() finished
func Configure(config Config) (err error) {
	if config.PollerNum > 0 {
		if err = pollmanager.SetNumLoops(config.PollerNum); err != nil {
			return err
		}
	}
	if config.BufferSize > 0 {
		defaultLinkBufferSize = config.BufferSize
	}

	if config.Runner != nil {
		runner.RunTask = config.Runner
	}
	if config.LoggerOutput != nil {
		logger = log.New(config.LoggerOutput, "", log.LstdFlags)
	}
	if config.LoadBalance >= 0 {
		if err = pollmanager.SetLoadBalance(config.LoadBalance); err != nil {
			return err
		}
	}

	return nil
}

// SetNumLoops is used to set the number of pollers, generally do not need to actively set.
// By default, the number of pollers is equal to runtime.GOMAXPROCS(0)/20+1.
// If the number of cores in your service process is less than 20c, theoretically only one poller is needed.
// Otherwise, you may need to adjust the number of pollers to achieve the best results.
// Experience recommends assigning a poller every 20c.
//
// You can only use SetNumLoops before any connection is created. An example usage:
//
//	func init() {
//	    netpoll.SetNumLoops(...)
//	}
//
// Deprecated: use Configure instead.
func SetNumLoops(numLoops int) error {
	return pollmanager.SetNumLoops(numLoops)
}

// SetLoadBalance sets the load balancing method. Load balancing is always a best effort to attempt
// to distribute the incoming connections between multiple polls.
// This option only works when numLoops is set.
// Deprecated: use Configure instead.
func SetLoadBalance(lb LoadBalance) error {
	return pollmanager.SetLoadBalance(lb)
}

// SetLoggerOutput sets the logger output target.
// Deprecated: use Configure instead.
func SetLoggerOutput(w io.Writer) {
	logger = log.New(w, "", log.LstdFlags)
}

// SetRunner set the runner function for every OnRequest/OnConnect callback
//
// Deprecated: use Configure and specify config.Runner instead.
func SetRunner(f func(ctx context.Context, f func())) {
	runner.RunTask = f
}

// DisableGopool will remove gopool(the goroutine pool used to run OnRequest),
// which means that OnRequest will be run via `go OnRequest(...)`.
// Usually, OnRequest will cause stack expansion, which can be solved by reusing goroutine.
// But if you can confirm that the OnRequest will not cause stack expansion,
// it is recommended to use DisableGopool to reduce redundancy and improve performance.
//
// Deprecated: use Configure() and specify config.Runner instead.
func DisableGopool() error {
	runner.UseGoRunTask()
	return nil
}

// NewEventLoop .
func NewEventLoop(onRequest OnRequest, ops ...Option) (EventLoop, error) {
	opts := &options{
		onRequest: onRequest,
	}
	for _, do := range ops {
		do.f(opts)
	}
	return &eventLoop{
		opts: opts,
		stop: make(chan error, 1),
	}, nil
}

type eventLoop struct {
	sync.Mutex
	opts *options
	svr  *server
	stop chan error
}

// Serve implements EventLoop.
func (evl *eventLoop) Serve(ln net.Listener) error {
	npln, err := ConvertListener(ln)
	if err != nil {
		return err
	}
	evl.Lock()
	evl.svr = newServer(npln, evl.opts, evl.quit)
	evl.svr.Run()
	evl.Unlock()

	err = evl.waitQuit()
	// ensure evl will not be finalized until Serve returns
	runtime.SetFinalizer(evl, nil)
	return err
}

// Shutdown signals a shutdown a begins server closing.
func (evl *eventLoop) Shutdown(ctx context.Context) error {
	evl.Lock()
	svr := evl.svr
	evl.svr = nil
	evl.Unlock()

	if svr == nil {
		return nil
	}
	evl.quit(nil)
	return svr.Close(ctx)
}

// waitQuit waits for a quit signal
func (evl *eventLoop) waitQuit() error {
	return <-evl.stop
}

func (evl *eventLoop) quit(err error) {
	select {
	case evl.stop <- err:
	default:
	}
}


================================================
FILE: netpoll_unix_test.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows
// +build !windows

package netpoll

import (
	"context"
	"errors"
	"fmt"
	"os"
	"runtime"
	"sync"
	"sync/atomic"
	"syscall"
	"testing"
	"time"

	"github.com/cloudwego/netpoll/internal/runner"
)

func MustNil(t *testing.T, val interface{}) {
	t.Helper()
	Assert(t, val == nil, val)
	if val != nil {
		t.Fatal("assertion nil failed, val=", val)
	}
}

func MustTrue(t *testing.T, cond bool) {
	t.Helper()
	if !cond {
		t.Fatal("assertion true failed.")
	}
}

func Equal(t *testing.T, got, expect interface{}) {
	t.Helper()
	if got != expect {
		t.Fatalf("assertion equal failed, got=[%v], expect=[%v]", got, expect)
	}
}

func Assert(t *testing.T, cond bool, val ...interface{}) {
	t.Helper()
	if !cond {
		if len(val) > 0 {
			val = append([]interface{}{"assertion failed:"}, val...)
			t.Fatal(val...)
		} else {
			t.Fatal("assertion failed")
		}
	}
}

var testPort int32 = 10000

// getTestAddress return a unique port for every tests, so all tests will not share a same listener
func getTestAddress() string {
	return fmt.Sprintf("127.0.0.1:%d", atomic.AddInt32(&testPort, 1))
}

func TestEqual(t *testing.T) {
	var err error
	MustNil(t, err)
	MustTrue(t, err == nil)
	Equal(t, err, nil)
	Assert(t, err == nil, err)
}

func TestOnConnect(t *testing.T) {
	network, address := "tcp", getTestAddress()
	req, resp := "ping", "pong"
	loop := newTestEventLoop(network, address,
		func(ctx context.Context, connection Connection) error {
			return nil
		},
		WithOnConnect(func(ctx context.Context, conn Connection) context.Context {
			for {
				input, err := conn.Reader().Next(len(req))
				if errors.Is(err, ErrEOF) || errors.Is(err, ErrConnClosed) {
					return ctx
				}
				MustNil(t, err)
				Equal(t, string(input), req)

				_, err = conn.Writer().WriteString(resp)
				MustNil(t, err)
				err = conn.Writer().Flush()
				MustNil(t, err)
			}
		}),
	)
	conn, err := DialConnection(network, address, time.Second)
	MustNil(t, err)

	for i := 0; i < 1024; i++ {
		_, err = conn.Writer().WriteString(req)
		MustNil(t, err)
		err = conn.Writer().Flush()
		MustNil(t, err)

		input, err := conn.Reader().Next(len(resp))
		MustNil(t, err)
		Equal(t, string(input), resp)
	}

	err = conn.Close()
	MustNil(t, err)

	err = loop.Shutdown(context.Background())
	MustNil(t, err)
}

func TestOnConnectWrite(t *testing.T) {
	network, address := "tcp", getTestAddress()
	loop := newTestEventLoop(network, address,
		func(ctx context.Context, connection Connection) error {
			return nil
		},
		WithOnConnect(func(ctx context.Context, connection Connection) context.Context {
			_, err := connection.Write([]byte("hello"))
			MustNil(t, err)
			return ctx
		}),
	)
	conn, err := DialConnection(network, address, time.Second)
	MustNil(t, err)
	s, err := conn.Reader().ReadString(5)
	MustNil(t, err)
	MustTrue(t, s == "hello")

	err = loop.Shutdown(context.Background())
	MustNil(t, err)
}

func TestOnDisconnect(t *testing.T) {
	type ctxKey struct{}
	network, address := "tcp", getTestAddress()
	var canceled, closed int32
	var conns int32 = 100
	req := "ping"
	loop := newTestEventLoop(network, address,
		func(ctx context.Context, connection Connection) error {
			cancelFunc, _ := ctx.Value(ctxKey{}).(context.CancelFunc)
			MustTrue(t, cancelFunc != nil)
			Assert(t, ctx.Done() != nil)

			buf, err := connection.Reader().Next(4) // should consumed all data
			MustNil(t, err)
			Equal(t, string(buf), req)
			select {
			case <-ctx.Done():
				atomic.AddInt32(&canceled, 1)
			case <-time.After(time.Second):
			}
			return nil
		},
		WithOnConnect(func(ctx context.Context, conn Connection) context.Context {
			conn.AddCloseCallback(func(connection Connection) error {
				atomic.AddInt32(&closed, 1)
				return nil
			})
			ctx, cancel := context.WithCancel(ctx)
			return context.WithValue(ctx, ctxKey{}, cancel)
		}),
		WithOnDisconnect(func(ctx context.Context, conn Connection) {
			cancelFunc, _ := ctx.Value(ctxKey{}).(context.CancelFunc)
			MustTrue(t, cancelFunc != nil)
			cancelFunc()
		}),
	)

	for i := int32(0); i < conns; i++ {
		conn, err := DialConnection(network, address, time.Second)
		MustNil(t, err)

		_, err = conn.Writer().WriteString(req)
		MustNil(t, err)
		err = conn.Writer().Flush()
		MustNil(t, err)

		err = conn.Close()
		MustNil(t, err)
	}
	for atomic.LoadInt32(&closed) < conns {
		t.Logf("closed: %d, canceled: %d", atomic.LoadInt32(&closed), atomic.LoadInt32(&canceled))
		runtime.Gosched()
	}
	Equal(t, atomic.LoadInt32(&closed), conns)
	Equal(t, atomic.LoadInt32(&canceled), conns)

	err := loop.Shutdown(context.Background())
	MustNil(t, err)
}

func TestOnDisconnectWhenOnConnect(t *testing.T) {
	type ctxPrepareKey struct{}
	type ctxConnectKey struct{}
	network, address := "tcp", getTestAddress()
	var conns int32 = 10
	var wg sync.WaitGroup
	wg.Add(int(conns) * 3)
	loop := newTestEventLoop(network, address,
		func(ctx context.Context, connection Connection) error {
			_, _ = connection.Reader().Next(connection.Reader().Len())
			return nil
		},
		WithOnPrepare(func(connection Connection) context.Context {
			defer wg.Done()
			var counter int32
			return context.WithValue(context.Background(), ctxPrepareKey{}, &counter)
		}),
		WithOnConnect(func(ctx context.Context, conn Connection) context.Context {
			defer wg.Done()
			t.Logf("OnConnect: %v", conn.RemoteAddr())
			time.Sleep(time.Millisecond * 10) // wait for closed called
			counter := ctx.Value(ctxPrepareKey{}).(*int32)
			ok := atomic.CompareAndSwapInt32(counter, 0, 1)
			Assert(t, ok)
			return context.WithValue(ctx, ctxConnectKey{}, "123")
		}),
		WithOnDisconnect(func(ctx context.Context, conn Connection) {
			defer wg.Done()
			t.Logf("OnDisconnect: %v", conn.RemoteAddr())
			counter, _ := ctx.Value(ctxPrepareKey{}).(*int32)
			ok := atomic.CompareAndSwapInt32(counter, 1, 2)
			Assert(t, ok)
			v := ctx.Value(ctxConnectKey{}).(string)
			Equal(t, v, "123")
		}),
	)

	for i := int32(0); i < conns; i++ {
		conn, err := DialConnection(network, address, time.Second)
		MustNil(t, err)
		err = conn.Close()
		t.Logf("Close: %v", conn.LocalAddr())
		MustNil(t, err)
	}

	wg.Wait()
	err := loop.Shutdown(context.Background())
	MustNil(t, err)
}

func TestGracefulExit(t *testing.T) {
	network, address := "tcp", getTestAddress()

	// exit without processing connections
	eventLoop1 := newTestEventLoop(network, address,
		func(ctx context.Context, connection Connection) error {
			return nil
		})
	_, err := DialConnection(network, address, time.Second)
	MustNil(t, err)
	err = eventLoop1.Shutdown(context.Background())
	MustNil(t, err)

	// exit with processing connections
	trigger := make(chan struct{})
	eventLoop2 := newTestEventLoop(network, address,
		func(ctx context.Context, conn Connection) error {
			<-trigger
			rd := conn.Reader()
			rd.Next(rd.Len()) // avoid dead loop
			return errors.New("done")
		})
	for i := 0; i < 10; i++ {
		// connect success
		conn, err := DialConnection(network, address, time.Second)
		MustNil(t, err)
		_, err = conn.Write(make([]byte, 16))
		MustNil(t, err)
	}
	// shutdown timeout
	ctx2, cancel2 := context.WithTimeout(context.Background(), time.Millisecond*100)
	defer cancel2()
	err = eventLoop2.Shutdown(ctx2)
	MustTrue(t, err != nil)
	Equal(t, err.Error(), ctx2.Err().Error())
	// shutdown success
	close(trigger)
	err = eventLoop2.Shutdown(ctx2)
	MustTrue(t, err == nil)

	// exit with read connections
	size := 16
	eventLoop3 := newTestEventLoop(network, address,
		func(ctx context.Context, connection Connection) error {
			_, err := connection.Reader().Next(size)
			MustNil(t, err)
			return nil
		})
	for i := 0; i < 10; i++ {
		conn, err := DialConnection(network, address, time.Second)
		MustNil(t, err)
		if i%2 == 0 {
			_, err := conn.Write(make([]byte, size))
			MustNil(t, err)
		}
	}
	ctx3, cancel3 := context.WithTimeout(context.Background(), 5*time.Second)
	defer cancel3()
	err = eventLoop3.Shutdown(ctx3)
	MustNil(t, err)
}

func TestCloseCallbackWhenOnRequest(t *testing.T) {
	network, address := "tcp", getTestAddress()
	requested, closed := make(chan struct{}), make(chan struct{})
	loop := newTestEventLoop(network, address,
		func(ctx context.Context, connection Connection) error {
			_, err := connection.Reader().Next(connection.Reader().Len())
			MustNil(t, err)
			err = connection.AddCloseCallback(func(connection Connection) error {
				closed <- struct{}{}
				return nil
			})
			MustNil(t, err)
			requested <- struct{}{}
			return nil
		},
	)
	conn, err := DialConnection(network, address, time.Second)
	MustNil(t, err)
	_, err = conn.Writer().WriteString("hello")
	MustNil(t, err)
	err = conn.Writer().Flush()
	MustNil(t, err)
	<-requested
	err = conn.Close()
	MustNil(t, err)
	<-closed

	err = loop.Shutdown(context.Background())
	MustNil(t, err)
}

func TestCloseCallbackWhenOnConnect(t *testing.T) {
	network, address := "tcp", getTestAddress()
	connected, closed := make(chan struct{}), make(chan struct{})
	loop := newTestEventLoop(network, address,
		nil,
		WithOnConnect(func(ctx context.Context, connection Connection) context.Context {
			err := connection.AddCloseCallback(func(connection Connection) error {
				closed <- struct{}{}
				return nil
			})
			MustNil(t, err)
			connected <- struct{}{}
			return ctx
		}),
	)
	conn, err := DialConnection(network, address, time.Second)
	MustNil(t, err)
	err = conn.Close()
	MustNil(t, err)

	<-connected
	<-closed

	err = loop.Shutdown(context.Background())
	MustNil(t, err)
}

func TestCloseConnWhenOnConnect(t *testing.T) {
	network, address := "tcp", "localhost:8888"
	conns := 10
	var wg sync.WaitGroup
	wg.Add(conns)
	loop := newTestEventLoop(network, address,
		nil,
		WithOnConnect(func(ctx context.Context, connection Connection) context.Context {
			defer wg.Done()
			err := connection.Close()
			MustNil(t, err)
			return ctx
		}),
	)

	for i := 0; i < conns; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			conn, err := DialConnection(network, address, time.Second)
			if err != nil {
				return
			}
			_, err = conn.Reader().Next(1)
			Assert(t, errors.Is(err, ErrEOF))
			err = conn.Close()
			MustNil(t, err)
		}()
	}

	wg.Wait()
	err := loop.Shutdown(context.Background())
	MustNil(t, err)
}

func TestServerReadAndClose(t *testing.T) {
	network, address := "tcp", getTestAddress()
	sendMsg := []byte("hello")
	loop := newTestEventLoop(network, address,
		func(ctx context.Context, connection Connection) error {
			_, err := connection.Reader().Next(len(sendMsg))
			MustNil(t, err)
			err = connection.Close()
			MustNil(t, err)
			return nil
		},
	)

	conn, err := DialConnection(network, address, time.Second)
	MustNil(t, err)
	_, err = conn.Writer().WriteBinary(sendMsg)
	MustNil(t, err)
	err = conn.Writer().Flush()
	MustNil(t, err)

	for conn.IsActive() {
		runtime.Gosched() // wait for poller close connection
	}
	_, err = conn.Writer().WriteBinary(sendMsg)
	Assert(t, errors.Is(err, ErrConnClosed), err)

	err = loop.Shutdown(context.Background())
	MustNil(t, err)
}

func TestServerPanicAndClose(t *testing.T) {
	// use custom RunTask to ignore panic log
	runfunc := runner.RunTask
	defer func() { runner.RunTask = runfunc }()
	runner.RunTask = func(ctx context.Context, f func()) {
		go func() {
			defer func() { recover() }()
			f()
		}()
	}

	network, address := "tcp", getTestAddress()
	sendMsg := []byte("hello")
	var panicked int32
	loop := newTestEventLoop(network, address,
		func(ctx context.Context, connection Connection) error {
			_, err := connection.Reader().Next(len(sendMsg))
			MustNil(t, err)
			atomic.StoreInt32(&panicked, 1)
			panic("test")
		},
	)

	conn, err := DialConnection(network, address, time.Second)
	MustNil(t, err)
	_, err = conn.Writer().WriteBinary(sendMsg)
	MustNil(t, err)
	err = conn.Writer().Flush()
	MustNil(t, err)

	for atomic.LoadInt32(&panicked) == 0 {
		runtime.Gosched() // wait for poller close connection
	}
	for conn.IsActive() {
		runtime.Gosched() // wait for poller close connection
	}

	err = loop.Shutdown(context.Background())
	MustNil(t, err)
}

func TestClientWriteAndClose(t *testing.T) {
	var (
		network, address            = "tcp", getTestAddress()
		connnum                     = 10
		packetsize, packetnum       = 1000 * 5, 1
		recvbytes             int32 = 0
	)
	loop := newTestEventLoop(network, address,
		func(ctx context.Context, connection Connection) error {
			buf, err := connection.Reader().Next(connection.Reader().Len())
			if errors.Is(err, ErrConnClosed) {
				return err
			}
			MustNil(t, err)
			atomic.AddInt32(&recvbytes, int32(len(buf)))
			return nil
		},
	)
	var wg sync.WaitGroup
	for i := 0; i < connnum; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			conn, err := DialConnection(network, address, time.Second)
			MustNil(t, err)
			sendMsg := make([]byte, packetsize)
			for j := 0; j < packetnum; j++ {
				_, err = conn.Write(sendMsg)
				MustNil(t, err)
			}
			err = conn.Close()
			MustNil(t, err)
		}()
	}
	wg.Wait()
	exceptbytes := int32(packetsize * packetnum * connnum)
	for atomic.LoadInt32(&recvbytes) != exceptbytes {
		t.Logf("left %d bytes not received", exceptbytes-atomic.LoadInt32(&recvbytes))
		runtime.Gosched()
	}
	err := loop.Shutdown(context.Background())
	MustNil(t, err)
}

func TestServerAcceptWhenTooManyOpenFiles(t *testing.T) {
	if os.Getenv("N_LOCAL") == "" {
		t.Skip("Only test for debug purpose")
		return
	}

	var originalRlimit syscall.Rlimit
	err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &originalRlimit)
	MustNil(t, err)
	t.Logf("Original RLimit: %v", originalRlimit)

	rlimit := syscall.Rlimit{Cur: 32, Max: originalRlimit.Max}
	err = syscall.Setrlimit(syscall.RLIMIT_NOFILE, &rlimit)
	MustNil(t, err)
	err = syscall.Getrlimit(syscall.RLIMIT_NOFILE, &rlimit)
	MustNil(t, err)
	t.Logf("New RLimit: %v", rlimit)
	defer func() { // reset
		err = syscall.Setrlimit(syscall.RLIMIT_NOFILE, &originalRlimit)
		MustNil(t, err)
	}()

	network, address := "tcp", getTestAddress()
	var connected int32
	loop := newTestEventLoop(network, address,
		func(ctx context.Context, connection Connection) error {
			buf, err := connection.Reader().Next(connection.Reader().Len())
			connection.Writer().WriteBinary(buf)
			connection.Writer().Flush()
			return err
		},
		WithOnConnect(func(ctx context.Context, connection Connection) context.Context {
			atomic.AddInt32(&connected, 1)
			t.Logf("Conn[%s] accepted", connection.RemoteAddr())
			return ctx
		}),
		WithOnDisconnect(func(ctx context.Context, connection Connection) {
			t.Logf("Conn[%s] disconnected", connection.RemoteAddr())
		}),
	)
	time.Sleep(time.Millisecond * 10)

	// out of fds
	files := make([]*os.File, 0)
	for {
		f, err := os.Open("/dev/null")
		if err != nil {
			Assert(t, isOutOfFdErr(errors.Unwrap(err)), err)
			break
		}
		files = append(files, f)
	}
	go func() {
		time.Sleep(time.Second * 10)
		t.Logf("close all files")
		for _, f := range files {
			f.Close()
		}
	}()

	// we should use telnet manually
	connections := 1
	for atomic.LoadInt32(&connected) < int32(connections) {
		t.Logf("connected=%d", atomic.LoadInt32(&connected))
		time.Sleep(time.Second)
	}
	time.Sleep(time.Second * 10)

	err = loop.Shutdown(context.Background())
	MustNil(t, err)
}

func createTestListener(network, address string) (Listener, error) {
	for {
		ln, err := CreateListener(network, address)
		if err == nil {
			return ln, nil
		}
		time.Sleep(time.Millisecond * 100)
	}
}

func newTestEventLoop(network, address string, onRequest OnRequest, opts ...Option) EventLoop {
	ln, err := createTestListener(network, address)
	if err != nil {
		panic(err)
	}
	elp, err := NewEventLoop(onRequest, opts...)
	if err != nil {
		panic(err)
	}
	go elp.Serve(ln)
	return elp
}


================================================
FILE: netpoll_windows.go
================================================
// Copyright 2024 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build windows
// +build windows

// The following methods would not be used, but are intended to compile on Windows.
package netpoll

import (
	"net"
)

// Configure the internal behaviors of netpoll.
func Configure(config Config) (err error) {
	return nil
}

// NewDialer only support TCP and unix socket now.
func NewDialer() Dialer {
	return nil
}

// NewEventLoop .
func NewEventLoop(onRequest OnRequest, ops ...Option) (EventLoop, error) {
	return nil, nil
}

// ConvertListener converts net.Listener to Listener
func ConvertListener(l net.Listener) (nl Listener, err error) {
	return nil, nil
}

// CreateListener return a new Listener.
func CreateListener(network, addr string) (l Listener, err error) {
	return nil, nil
}


================================================
FILE: nocopy.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package netpoll

import (
	"io"
	"reflect"
	"unsafe"

	"github.com/bytedance/gopkg/lang/dirtmake"
	"github.com/bytedance/gopkg/lang/mcache"
)

// Reader is a collection of operations for nocopy reads.
//
// For ease of use, it is recommended to implement Reader as a blocking interface,
// rather than simply fetching the buffer.
// For example, the return of calling Next(n) should be blocked if there are fewer than n bytes, unless timeout.
// The return value is guaranteed to meet the requirements or an error will be returned.
type Reader interface {
	// Next returns a slice containing the next n bytes from the buffer,
	// advancing the buffer as if the bytes had been returned by Read.
	//
	// If there are fewer than n bytes in the buffer, Next returns will be blocked
	// until data enough or an error occurs (such as a wait timeout).
	//
	// The slice p is only valid until the next call to the Release method.
	// Next is not globally optimal, and Skip, ReadString, ReadBinary methods
	// are recommended for specific scenarios.
	//
	// Return: len(p) must be n or 0, and p and error cannot be nil at the same time.
	Next(n int) (p []byte, err error)

	// Peek returns the next n bytes without advancing the reader.
	// Other behavior is the same as Next.
	Peek(n int) (buf []byte, err error)

	// Skip the next n bytes and advance the reader, which is
	// a faster implementation of Next when the next data is not used.
	Skip(n int) (err error)

	// Until reads until the first occurrence of delim in the input,
	// returning a slice stops with delim in the input buffer.
	// If Until encounters an error before finding a delimiter,
	// it returns all the data in the buffer and the error itself (often ErrEOF or ErrConnClosed).
	// Until returns err != nil only if line does not end in delim.
	Until(delim byte) (line []byte, err error)

	// ReadString is a faster implementation of Next when a string needs to be returned.
	// It replaces:
	//
	//  var p, err = Next(n)
	//  return string(p), err
	//
	ReadString(n int) (s string, err error)

	// ReadBinary is a faster implementation of Next when it needs to
	// return a copy of the slice that is not shared with the underlying layer.
	// It replaces:
	//
	//  var p, err = Next(n)
	//  var b = make([]byte, n)
	//  copy(b, p)
	//  return b, err
	//
	ReadBinary(n int) (p []byte, err error)

	// ReadByte is a faster implementation of Next when a byte needs to be returned.
	// It replaces:
	//
	//  var p, err = Next(1)
	//  return p[0], err
	//
	ReadByte() (b byte, err error)

	// Slice returns a new Reader containing the Next n bytes from this Reader.
	//
	// If you want to make a new Reader using the []byte returned by Next, Slice already does that,
	// and the operation is zero-copy. Besides, Slice would also Release this Reader.
	// The logic pseudocode is similar:
	//
	//  var p, err = this.Next(n)
	//  var reader = new Reader(p) // pseudocode
	//  this.Release()
	//  return reader, err
	//
	Slice(n int) (r Reader, err error)

	// Release the memory space occupied by all read slices. This method needs to be executed actively to
	// recycle the memory after confirming that the previously read data is no longer in use.
	// After invoking Release, the slices obtained by the method such as Next, Peek, Skip will
	// become an invalid address and cannot be used anymore.
	Release() (err error)

	// Len returns the total length of the readable data in the reader.
	Len() (length int)
}

// Writer is a collection of operations for nocopy writes.
//
// The usage of the design is a two-step operation, first apply for a section of memory,
// fill it and then submit. E.g:
//
//	var buf, _ = Malloc(n)
//	buf = append(buf[:0], ...)
//	Flush()
//
// Note that it is not recommended to submit self-managed buffers to Writer.
// Since the writer is processed asynchronously, if the self-managed buffer is used and recycled after submission,
// it may cause inconsistent life cycle problems. Of course this is not within the scope of the design.
type Writer interface {
	// Malloc returns a slice containing the next n bytes from the buffer,
	// which will be written after submission(e.g. Flush).
	//
	// The slice p is only valid until the next submit(e.g. Flush).
	// Therefore, please make sure that all data has been written into the slice before submission.
	Malloc(n int) (buf []byte, err error)

	// WriteString is a faster implementation of Malloc when a string needs to be written.
	// It replaces:
	//
	//  var buf, err = Malloc(len(s))
	//  n = copy(buf, s)
	//  return n, err
	//
	// The argument string s will be referenced based on the original address and will not be copied,
	// so make sure that the string s will not be changed.
	WriteString(s string) (n int, err error)

	// WriteBinary is a faster implementation of Malloc when a slice needs to be written.
	// It replaces:
	//
	//  var buf, err = Malloc(len(b))
	//  n = copy(buf, b)
	//  return n, err
	//
	// The argument slice b will be referenced based on the original address and will not be copied,
	// so make sure that the slice b will not be changed.
	WriteBinary(b []byte) (n int, err error)

	// WriteByte is a faster implementation of Malloc when a byte needs to be written.
	// It replaces:
	//
	//  var buf, _ = Malloc(1)
	//  buf[0] = b
	//
	WriteByte(b byte) (err error)

	// WriteDirect is used to insert an additional slice of data on the current write stream.
	// For example, if you plan to execute:
	//
	//  var bufA, _ = Malloc(nA)
	//  WriteBinary(b)
	//  var bufB, _ = Malloc(nB)
	//
	// It can be replaced by:
	//
	//  var buf, _ = Malloc(nA+nB)
	//  WriteDirect(b, nB)
	//
	// where buf[:nA] = bufA, buf[nA:nA+nB] = bufB.
	WriteDirect(p []byte, remainCap int) error

	// MallocAck will keep the first n malloc bytes and discard the rest.
	// The following behavior:
	//
	//  var buf, _ = Malloc(8)
	//  buf = buf[:5]
	//  MallocAck(5)
	//
	// equivalent as
	//  var buf, _ = Malloc(5)
	//
	MallocAck(n int) (err error)

	// Append the argument writer to the tail of this writer and set the argument writer to nil,
	// the operation is zero-copy, similar to p = append(p, w.p).
	Append(w Writer) (err error)

	// Flush will submit all malloc data and must confirm that the allocated bytes have been correctly assigned.
	// Its behavior is equivalent to the io.Writer hat already has parameters(slice b).
	Flush() (err error)

	// MallocLen returns the total length of the writable data that has not yet been submitted in the writer.
	MallocLen() (length int)
}

// ReadWriter is a combination of Reader and Writer.
type ReadWriter interface {
	Reader
	Writer
}

// NewReader convert io.Reader to nocopy Reader
func NewReader(r io.Reader) Reader {
	return newZCReader(r)
}

// NewWriter convert io.Writer to nocopy Writer
func NewWriter(w io.Writer) Writer {
	return newZCWriter(w)
}

// NewReadWriter convert io.ReadWriter to nocopy ReadWriter
func NewReadWriter(rw io.ReadWriter) ReadWriter {
	return &zcReadWriter{
		zcReader: newZCReader(rw),
		zcWriter: newZCWriter(rw),
	}
}

// NewIOReader convert Reader to io.Reader
func NewIOReader(r Reader) io.Reader {
	if reader, ok := r.(io.Reader); ok {
		return reader
	}
	return newIOReader(r)
}

// NewIOWriter convert Writer to io.Writer
func NewIOWriter(w Writer) io.Writer {
	if writer, ok := w.(io.Writer); ok {
		return writer
	}
	return newIOWriter(w)
}

// NewIOReadWriter convert ReadWriter to io.ReadWriter
func NewIOReadWriter(rw ReadWriter) io.ReadWriter {
	if rwer, ok := rw.(io.ReadWriter); ok {
		return rwer
	}
	return &ioReadWriter{
		Reader: NewIOReader(rw),
		Writer: NewIOWriter(rw),
	}
}

const (
	block1k  = 1 * 1024
	block2k  = 2 * 1024
	block4k  = 4 * 1024
	block8k  = 8 * 1024
	block32k = 32 * 1024

	pagesize  = block8k
	mallocMax = block8k * block1k // mallocMax is 8MB

	defaultLinkBufferMode = 0
	// flagUnmanaged marks a buffer node whose memory is not allocated by the LinkBuffer
	// (e.g. user-provided data via WriteDirect, or a zero-size node).
	// Unmanaged nodes are not reusable and are skipped during buffer growth.
	flagUnmanaged uint8 = 1 << 0 // 0000 0001
	// flagReadExposed marks a buffer node whose underlying memory has been returned
	// directly to user code via a zero-copy Reader method (Next, Peek, Slice, GetBytes).
	// The buffer may still be referenced by user code until Release is called.
	flagReadExposed uint8 = 1 << 1 // 0000 0010
)

// zero-copy slice convert to string
func unsafeSliceToString(b []byte) string {
	return *(*string)(unsafe.Pointer(&b))
}

// zero-copy string convert to slice
func unsafeStringToSlice(s string) (b []byte) {
	p := unsafe.Pointer((*reflect.StringHeader)(unsafe.Pointer(&s)).Data)
	hdr := (*reflect.SliceHeader)(unsafe.Pointer(&b))
	hdr.Data = uintptr(p)
	hdr.Cap = len(s)
	hdr.Len = len(s)
	return b
}

// malloc limits the cap of the buffer from mcache.
func malloc(size, capacity int) []byte {
	if capacity > mallocMax {
		return dirtmake.Bytes(size, capacity)
	}
	return mcache.Malloc(size, capacity)
}

// free limits the cap of the buffer from mcache.
func free(buf []byte) {
	if cap(buf) > mallocMax {
		return
	}
	mcache.Free(buf)
}


================================================
FILE: nocopy_linkbuffer.go
================================================
// Copyright 2024 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package netpoll

import (
	"bytes"
	"errors"
	"fmt"
	"sync"
	"sync/atomic"

	"github.com/bytedance/gopkg/lang/dirtmake"
)

// BinaryInplaceThreshold marks the minimum value of the nocopy slice length,
// which is the threshold to use copy to minimize overhead.
const BinaryInplaceThreshold = block4k

// LinkBufferCap that can be modified marks the minimum value of each node of LinkBuffer.
var LinkBufferCap = block4k

var untilErr = errors.New("link buffer read slice cannot find delim")

var (
	_ Reader = &LinkBuffer{}
	_ Writer = &LinkBuffer{}
)

// NewLinkBuffer size defines the initial capacity, but there is no readable data.
func NewLinkBuffer(size ...int) *LinkBuffer {
	buf := &LinkBuffer{}
	var l int
	if len(size) > 0 {
		l = size[0]
	}
	node := newLinkBufferNode(l)
	buf.head, buf.read, buf.flush, buf.write = node, node, node, node
	return buf
}

// UnsafeLinkBuffer implements ReadWriter.
type UnsafeLinkBuffer struct {
	length     int64
	mallocSize int

	head  *linkBufferNode // release head
	read  *linkBufferNode // read head
	flush *linkBufferNode // malloc head
	write *linkBufferNode // malloc tail

	// buf allocated by Next when cross-package, which should be freed when release
	caches [][]byte

	// for `Peek` only, avoid creating too many []byte in `caches`
	// fix the issue when we have a large buffer and we call `Peek` multiple times
	cachePeek []byte
}

// Len implements Reader.
func (b *UnsafeLinkBuffer) Len() int {
	l := atomic.LoadInt64(&b.length)
	return int(l)
}

// IsEmpty check if this LinkBuffer is empty.
func (b *UnsafeLinkBuffer) IsEmpty() (ok bool) {
	return b.Len() == 0
}

// ------------------------------------------ implement copy reader ------------------------------------------

// readCopy copies up to len(p) bytes from the buffer into p without exposing
// the underlying buffer to user code (flagReadExposed is not set).
// After copying, it releases consumed nodes where readExposed is false.
// Nodes with readExposed are left for the next Release call.
func (b *UnsafeLinkBuffer) readCopy(p []byte) (n int) {
	l := len(p)
	if l == 0 || b.Len() == 0 {
		return 0
	}
	if has := b.Len(); has < l {
		l = has
	}
	b.recalLen(-l)

	// copy from nodes
	for ack := l; ack > 0; {
		if b.read.Len() == 0 {
			b.read = b.read.next
			continue
		}
		rd := b.read.Len()
		if rd >= ack {
			n += copy(p[n:], b.read.buf[b.read.off:b.read.off+ack])
			b.read.off += ack
			break
		}
		n += copy(p[n:], b.read.buf[b.read.off:])
		ack -= rd
		b.read = b.read.next
	}

	// advance read past empty nodes
	for b.read != b.flush && b.read.Len() == 0 {
		b.read = b.read.next
	}
	// release consumed nodes that are not readExposed.
	// exposed nodes stay in the chain so Release() can free them later.
	//
	// Example: [exposed/consumed] → [not-exposed/consumed] → [read/partial]
	// After:   head → [exposed] → [read/partial]
	//          the middle node is detached and released.
	var prev *linkBufferNode
	newHead := b.read
	for cur := b.head; cur != b.read; {
		next := cur.next
		if cur.readExposed() {
			if prev == nil {
				newHead = cur
			}
			prev = cur
		} else {
			cur.Release()
			if prev != nil {
				prev.next = next
			}
		}
		cur = next
	}
	b.head = newHead
	return n
}

// ------------------------------------------ implement zero-copy reader ------------------------------------------

// Next implements Reader.
func (b *UnsafeLinkBuffer) Next(n int) (p []byte, err error) {
	if n <= 0 {
		return
	}
	// check whether enough or not.
	if b.Len() < n {
		return p, fmt.Errorf("link buffer next[%d] not enough", n)
	}
	b.recalLen(-n) // re-cal length

	// single node
	if b.isSingleNode(n) {
		b.read.setFlag(flagReadExposed)
		return b.read.Next(n), nil
	}
	// multiple nodes
	var pIdx int
	if block1k < n && n <= mallocMax {
		p = malloc(n, n)
		b.caches = append(b.caches, p)
	} else {
		p = dirtmake.Bytes(n, n)
	}
	var l int
	for ack := n; ack > 0; ack = ack - l {
		l = b.read.Len()
		if l >= ack {
			pIdx += copy(p[pIdx:], b.read.Next(ack))
			break
		} else if l > 0 {
			pIdx += copy(p[pIdx:], b.read.Next(l))
		}
		b.read = b.read.next
	}
	_ = pIdx
	return p, nil
}

// Peek does not have an independent lifecycle, and there is no signal to
// indicate that Peek content can be released, so Peek will not introduce mcache for now.
func (b *UnsafeLinkBuffer) Peek(n int) (p []byte, err error) {
	if n <= 0 {
		return
	}
	// check whether enough or not.
	if b.Len() < n {
		return p, fmt.Errorf("link buffer peek[%d] not enough", n)
	}
	// single node
	if b.isSingleNode(n) {
		b.read.setFlag(flagReadExposed)
		return b.read.Peek(n), nil
	}

	// multiple nodes

	// try to make use of the cap of b.cachePeek, if can't, free it.
	if b.cachePeek != nil && cap(b.cachePeek) < n {
		free(b.cachePeek)
		b.cachePeek = nil
	}
	if b.cachePeek == nil {
		b.cachePeek = malloc(0, n) // init with zero len, will append later
	}
	p = b.cachePeek
	if len(p) >= n {
		// in case we peek smaller than last time,
		// we can return cache data directly.
		// we will reset cachePeek when Next or Skip, no worries about stale data
		return p[:n], nil
	}

	// How it works >>>>>>
	// [ -------- node0 -------- ][ --------- node1 --------- ]  <- b.read
	// [ --------------- p --------------- ]
	//                                     ^ len(p)     ^ n here
	//                           ^ scanned
	// `scanned` var is the len of last nodes which we scanned and already copied to p
	// `len(p) - scanned` is the start pos of current node for p to copy from
	// `n - len(p)` is the len of bytes we're going to append to p
	// 		we copy `len(node1)` - `len(p) - scanned` bytes in case node1 doesn't have enough data
	for scanned, node := 0, b.read; len(p) < n; node = node.next {
		l := node.Len()
		if scanned+l <= len(p) { // already copied in p, skip
			scanned += l
			continue
		}
		start := len(p) - scanned // `start` must be smaller than l coz `scanned+l <= len(p)` is false
		copyn := n - len(p)
		if nodeLeftN := l - start; copyn > nodeLeftN {
			copyn = nodeLeftN
		}
		p = append(p, node.Peek(l)[start:start+copyn]...)
		scanned += l
	}
	b.cachePeek = p
	return p[:n], nil
}

// Skip implements Reader.
func (b *UnsafeLinkBuffer) Skip(n int) (err error) {
	if n <= 0 {
		return
	}
	// check whether enough or not.
	if b.Len() < n {
		return fmt.Errorf("link buffer skip[%d] not enough", n)
	}
	b.recalLen(-n) // re-cal length

	var l int
	for ack := n; ack > 0; ack = ack - l {
		l = b.read.Len()
		if l >= ack {
			b.read.off += ack
			break
		}
		b.read = b.read.next
	}
	return nil
}

// Release the node that has been read.
// b.flush == nil indicates that this LinkBuffer is created by LinkBuffer.Slice
func (b *UnsafeLinkBuffer) Release() (err error) {
	for b.read != b.flush && b.read.Len() == 0 {
		b.read = b.read.next
	}
	for b.head != b.read {
		node := b.head
		b.head = b.head.next
		node.Release()
	}
	for i := range b.caches {
		free(b.caches[i])
		b.caches[i] = nil
	}
	b.caches = b.caches[:0]
	if b.cachePeek != nil {
		free(b.cachePeek)
		b.cachePeek = nil
	}
	return nil
}

// ReadString implements Reader.
func (b *UnsafeLinkBuffer) ReadString(n int) (s string, err error) {
	if n <= 0 {
		return
	}
	// check whether enough or not.
	if b.Len() < n {
		return s, fmt.Errorf("link buffer read string[%d] not enough", n)
	}
	return unsafeSliceToString(b.readBinary(n)), nil
}

// ReadBinary implements Reader.
func (b *UnsafeLinkBuffer) ReadBinary(n int) (p []byte, err error) {
	if n <= 0 {
		return
	}
	// check whether enough or not.
	if b.Len() < n {
		return p, fmt.Errorf("link buffer read binary[%d] not enough", n)
	}
	return b.readBinary(n), nil
}

// readBinary cannot use mcache, because the memory allocated by readBinary will not be recycled.
func (b *UnsafeLinkBuffer) readBinary(n int) (p []byte) {
	b.recalLen(-n) // re-cal length

	// single node
	if b.isSingleNode(n) {
		p = dirtmake.Bytes(n, n)
		copy(p, b.read.Next(n))
		return p
	}
	p = dirtmake.Bytes(n, n)
	// multiple nodes
	var pIdx int
	var l int
	for ack := n; ack > 0; ack = ack - l {
		l = b.read.Len()
		if l >= ack {
			pIdx += copy(p[pIdx:], b.read.Next(ack))
			break
		} else if l > 0 {
			pIdx += copy(p[pIdx:], b.read.Next(l))
		}
		b.read = b.read.next
	}
	_ = pIdx
	return p
}

// ReadByte implements Reader.
func (b *UnsafeLinkBuffer) ReadByte() (p byte, err error) {
	// check whether enough or not.
	if b.Len() < 1 {
		return p, errors.New("link buffer read byte is empty")
	}
	b.recalLen(-1) // re-cal length
	for {
		if b.read.Len() >= 1 {
			return b.read.Next(1)[0], nil
		}
		b.read = b.read.next
	}
}

// Until returns a slice ends with the delim in the buffer.
func (b *UnsafeLinkBuffer) Until(delim byte) (line []byte, err error) {
	n := b.indexByte(delim, 0)
	if n < 0 {
		return nil, untilErr
	}
	return b.Next(n + 1)
}

// Slice returns a new LinkBuffer, which is a zero-copy slice of this LinkBuffer,
// and only holds the ability of Reader.
//
// Slice will automatically execute a Release.
func (b *UnsafeLinkBuffer) Slice(n int) (r Reader, err error) {
	if n <= 0 {
		return NewLinkBuffer(0), nil
	}
	// check whether enough or not.
	if b.Len() < n {
		return r, fmt.Errorf("link buffer readv[%d] not enough", n)
	}
	b.recalLen(-n) // re-cal length

	// just use for range
	p := new(LinkBuffer)
	p.length = int64(n)

	defer func() {
		// set to read-only
		p.flush = p.flush.next
		p.write = p.flush
	}()

	// single node
	if b.isSingleNode(n) {
		b.read.setFlag(flagReadExposed)
		node := b.read.Refer(n)
		p.head, p.read, p.flush = node, node, node
		return p, nil
	}
	// multiple nodes
	l := b.read.Len()
	b.read.setFlag(flagReadExposed)
	node := b.read.Refer(l)
	b.read = b.read.next

	p.head, p.read, p.flush = node, node, node
	for ack := n - l; ack > 0; ack = ack - l {
		l = b.read.Len()
		if l >= ack {
			b.read.setFlag(flagReadExposed)
			p.flush.next = b.read.Refer(ack)
			p.flush = p.flush.next
			break
		} else if l > 0 {
			b.read.setFlag(flagReadExposed)
			p.flush.next = b.read.Refer(l)
			p.flush = p.flush.next
		}
		b.read = b.read.next
	}
	return p, b.Release()
}

// ------------------------------------------ implement zero-copy writer ------------------------------------------

// Malloc pre-allocates memory, which is not readable, and becomes readable data after submission(e.g. Flush).
func (b *UnsafeLinkBuffer) Malloc(n int) (buf []byte, err error) {
	if n <= 0 {
		return
	}
	b.mallocSize += n
	b.growth(n)
	return b.write.Malloc(n), nil
}

// MallocLen implements Writer.
func (b *UnsafeLinkBuffer) MallocLen() (length int) {
	return b.mallocSize
}

// MallocAck will keep the first n malloc bytes and discard the rest.
func (b *UnsafeLinkBuffer) MallocAck(n int) (err error) {
	if n < 0 {
		return fmt.Errorf("link buffer malloc ack[%d] invalid", n)
	}
	b.mallocSize = n
	b.write = b.flush

	var l int
	for ack := n; ack > 0; ack = ack - l {
		l = b.write.malloc - len(b.write.buf)
		if l >= ack {
			b.write.malloc = ack + len(b.write.buf)
			break
		}
		b.write = b.write.next
	}
	// discard the rest
	for node := b.write.next; node != nil; node = node.next {
		node.malloc, node.refer, node.buf = node.off, 1, node.buf[:node.off]
	}
	return nil
}

// Flush will submit all malloc data and must confirm that the allocated bytes have been correctly assigned.
func (b *UnsafeLinkBuffer) Flush() (err error) {
	b.mallocSize = 0
	// FIXME: The tail node must not be larger than 8KB to prevent Out Of Memory.
	if cap(b.write.buf) > pagesize {
		b.write.next = newLinkBufferNode(0)
		b.write = b.write.next
	}
	var n int
	for node := b.flush; node != b.write.next; node = node.next {
		delta := node.malloc - len(node.buf)
		if delta > 0 {
			n += delta
			node.buf = node.buf[:node.malloc]
		}
	}
	b.flush = b.write
	// re-cal length
	b.recalLen(n)
	return nil
}

// Append implements Writer.
func (b *UnsafeLinkBuffer) Append(w Writer) (err error) {
	buf, ok := w.(*LinkBuffer)
	if !ok {
		return errors.New("unsupported writer which is not LinkBuffer")
	}
	return b.WriteBuffer(buf)
}

// WriteBuffer will not submit(e.g. Flush) data to ensure normal use of MallocLen.
// you must actively submit before read the data.
// The argument buf can't be used after calling WriteBuffer. (set it to nil)
func (b *UnsafeLinkBuffer) WriteBuffer(buf *LinkBuffer) (err error) {
	if buf == nil {
		return
	}
	bufLen, bufMallocLen := buf.Len(), buf.MallocLen()
	if bufLen+bufMallocLen <= 0 {
		return nil
	}
	b.write.next = buf.read
	b.write = buf.write

	// close buf, prevents reuse.
	for buf.head != buf.read {
		nd := buf.head
		buf.head = buf.head.next
		nd.Release()
	}
	for buf.write = buf.write.next; buf.write != nil; {
		nd := buf.write
		buf.write = buf.write.next
		nd.Release()
	}
	buf.length, buf.mallocSize, buf.head, buf.read, buf.flush, buf.write = 0, 0, nil, nil, nil, nil

	// DON'T MODIFY THE CODE BELOW UNLESS YOU KNOW WHAT YOU ARE DOING !
	//
	// You may encounter a chain of bugs and not be able to
	// find out within a week that they are caused by modifications here.
	//
	// After release buf, continue to adjust b.
	b.write.next = nil
	if bufLen > 0 {
		b.recalLen(bufLen)
	}
	b.mallocSize += bufMallocLen
	return nil
}

// WriteString implements Writer.
func (b *UnsafeLinkBuffer) WriteString(s string) (n int, err error) {
	if len(s) == 0 {
		return
	}
	buf := unsafeStringToSlice(s)
	return b.WriteBinary(buf)
}

// WriteBinary implements Writer.
func (b *UnsafeLinkBuffer) WriteBinary(p []byte) (n int, err error) {
	n = len(p)
	if n == 0 {
		return
	}
	b.mallocSize += n

	// TODO: Verify that all nocopy is possible under mcache.
	if n > BinaryInplaceThreshold {
		// expand buffer directly with nocopy
		b.write.next = newLinkBufferNode(0)
		b.write = b.write.next
		b.write.buf, b.write.malloc = p[:0], n
		return n, nil
	}
	// here will copy
	b.growth(n)
	buf := b.write.Malloc(n)
	return copy(buf, p), nil
}

// WriteDirect cannot be mixed with WriteString or WriteBinary functions.
func (b *UnsafeLinkBuffer) WriteDirect(extra []byte, remainLen int) error {
	n := len(extra)
	if n == 0 || remainLen < 0 {
		return nil
	}
	// find origin
	origin := b.flush
	malloc := b.mallocSize - remainLen // calculate the remaining malloc length
	for t := origin.malloc - len(origin.buf); t < malloc; t = origin.malloc - len(origin.buf) {
		malloc -= t
		origin = origin.next
	}
	// Add the buf length of the original node
	// `malloc` is the origin buffer offset that already malloced, the extra buffer should be inserted after that offset.
	malloc += len(origin.buf)

	// Create dataNode and newNode and insert them into the chain
	// dataNode wrap the user buffer extra, and newNode wrap the origin left netpoll buffer
	// - originNode{buf=origin, off=0, malloc=malloc, readonly=true} : non-reusable
	// - dataNode{buf=extra, off=0, malloc=len(extra), readonly=true} : non-reusable
	// - newNode{buf=origin, off=malloc, malloc=origin.malloc, readonly=false} : reusable
	dataNode := newLinkBufferNode(0) // zero node will be set by readonly mode
	dataNode.buf, dataNode.malloc = extra[:0], n

	if remainLen > 0 {
		// split a single buffer node to originNode and newNode
		newNode := newLinkBufferNode(0)
		newNode.off = malloc
		newNode.buf = origin.buf[:malloc]
		newNode.malloc = origin.malloc
		newNode.unsetFlag(flagUnmanaged)
		origin.malloc = malloc
		origin.setFlag(flagUnmanaged)

		// link nodes
		dataNode.next = newNode
		newNode.next = origin.next
		origin.next = dataNode
	} else {
		// link nodes
		dataNode.next = origin.next
		origin.next = dataNode
	}

	// adjust b.write
	for b.write.next != nil {
		b.write = b.write.next
	}

	b.mallocSize += n
	return nil
}

// WriteByte implements Writer.
func (b *UnsafeLinkBuffer) WriteByte(p byte) (err error) {
	dst, err := b.Malloc(1)
	if len(dst) == 1 {
		dst[0] = p
	}
	return err
}

// Close will recycle all buffer.
func (b *UnsafeLinkBuffer) Close() (err error) {
	atomic.StoreInt64(&b.length, 0)
	b.mallocSize = 0
	// just release all
	b.Release()
	for node := b.head; node != nil; {
		nd := node
		node = node.next
		nd.Release()
	}
	b.head, b.read, b.flush, b.write = nil, nil, nil, nil
	return nil
}

// ------------------------------------------ implement connection interface ------------------------------------------

// Bytes returns all the readable bytes of this LinkBuffer.
func (b *UnsafeLinkBuffer) Bytes() []byte {
	node, flush := b.read, b.flush
	if node == flush {
		return node.buf[node.off:]
	}
	n := 0
	p := dirtmake.Bytes(b.Len(), b.Len())
	for ; node != flush; node = node.next {
		if node.Len() > 0 {
			n += copy(p[n:], node.buf[node.off:])
		}
	}
	n += copy(p[n:], flush.buf[flush.off:])
	return p[:n]
}

// GetBytes will read and fill the slice p as much as possible.
// If p is not passed, return all readable bytes.
func (b *UnsafeLinkBuffer) GetBytes(p [][]byte) (vs [][]byte) {
	node, flush := b.read, b.flush
	if len(p) == 0 {
		n := 0
		for ; node != flush; node = node.next {
			n++
		}
		node = b.read
		p = make([][]byte, n)
	}
	var i int
	for i = 0; node != flush && i < len(p); node = node.next {
		if node.Len() > 0 {
			node.setFlag(flagReadExposed)
			p[i] = node.buf[node.off:]
			i++
		}
	}
	if i < len(p) {
		flush.setFlag(flagReadExposed)
		p[i] = flush.buf[flush.off:]
		i++
	}
	return p[:i]
}

// book will grow and malloc buffer to hold data.
//
// bookSize: The size of data that can be read at once.
// maxSize: The maximum size of data between two Release(). In some cases, this can
//
//	guarantee all data allocated in one node to reduce copy.
func (b *UnsafeLinkBuffer) book(bookSize, maxSize int) (p []byte) {
	l := cap(b.write.buf) - b.write.malloc
	// grow linkBuffer
	if l == 0 {
		l = maxSize
		b.write.next = newLinkBufferNode(maxSize)
		b.write = b.write.next
	}
	if l > bookSize {
		l = bookSize
	}
	return b.write.Malloc(l)
}

// bookAck will ack the first n malloc bytes and discard the rest.
//
// length: The size of data in inputBuffer. It is used to calculate the maxSize
func (b *UnsafeLinkBuffer) bookAck(n int) (length int, err error) {
	b.write.malloc = n + len(b.write.buf)
	b.write.buf = b.write.buf[:b.write.malloc]
	b.flush = b.write

	// re-cal length
	length = b.recalLen(n)
	return length, nil
}

// calcMaxSize will calculate the data size between two Release()
func (b *UnsafeLinkBuffer) calcMaxSize() (sum int) {
	for node := b.head; node != b.read; node = node.next {
		sum += len(node.buf)
	}
	sum += len(b.read.buf)
	return sum
}

// resetTail will reset tail node or add an empty tail node to
// guarantee the tail node is not larger than 8KB
func (b *UnsafeLinkBuffer) resetTail(maxSize int) {
	if maxSize <= pagesize {
		// no need to reset a small buffer tail node
		return
	}
	// set nil tail
	b.write.next = newLinkBufferNode(0)
	b.write = b.write.next
	b.flush = b.write
}

// indexByte returns the index of the first instance of c in buffer, or -1 if c is not present in buffer.
func (b *UnsafeLinkBuffer) indexByte(c byte, skip int) int {
	size := b.Len()
	if skip >= size {
		return -1
	}
	var unread, n, l int
	node := b.read
	for unread = size; unread > 0; unread -= n {
		l = node.Len()
		if l >= unread { // last node
			n = unread
		} else { // read full node
			n = l
		}

		// skip current node
		if skip >= n {
			skip -= n
			node = node.next
			continue
		}
		i := bytes.IndexByte(node.Peek(n)[skip:], c)
		if i >= 0 {
			return (size - unread) + skip + i // past_read + skip_read + index
		}
		skip = 0 // no skip bytes
		node = node.next
	}
	return -1
}

// ------------------------------------------ private function ------------------------------------------

// recalLen re-calculate the length
func (b *UnsafeLinkBuffer) recalLen(delta int) (length int) {
	if delta < 0 && len(b.cachePeek) > 0 {
		// b.cachePeek will contain stale data if we read out even a single byte from buffer,
		// so we need to reset it or the next Peek call will return invalid bytes.
		b.cachePeek = b.cachePeek[:0]
	}
	return int(atomic.AddInt64(&b.length, int64(delta)))
}

// growth directly create the next node, when b.write is not enough.
func (b *UnsafeLinkBuffer) growth(n int) {
	if n <= 0 {
		return
	}
	// the memory of readonly node if not malloc by us so should skip them
	for b.write.getFlag(flagUnmanaged) || cap(b.write.buf)-b.write.malloc < n {
		if b.write.next == nil {
			b.write.next = newLinkBufferNode(n)
			b.write = b.write.next
			return
		}
		b.write = b.write.next
	}
}

// isSingleNode determines whether reading needs to cross nodes.
// isSingleNode will move b.read to latest non-empty node if there is a zero-size node
// Must require b.Len() > 0
func (b *UnsafeLinkBuffer) isSingleNode(readN int) (single bool) {
	if readN <= 0 {
		return true
	}
	l := b.read.Len()
	for l == 0 && b.read != b.flush {
		b.read = b.read.next
		l = b.read.Len()
	}
	return l >= readN
}

// memorySize return the real memory size in bytes the LinkBuffer occupied
func (b *LinkBuffer) memorySize() (bytes int) {
	for node := b.head; node != nil; node = node.next {
		bytes += cap(node.buf)
	}
	for _, c := range b.caches {
		bytes += cap(c)
	}
	bytes += cap(b.cachePeek)
	return bytes
}

// ------------------------------------------ implement link node ------------------------------------------

// newLinkBufferNode create or reuse linkBufferNode.
// Nodes with size <= 0 are marked as readonly, which means the node.buf is not allocated by this mcache.
func newLinkBufferNode(size int) *linkBufferNode {
	node := linkedPool.Get().(*linkBufferNode)
	// reset node offset
	node.off, node.malloc, node.refer, node.mode = 0, 0, 1, defaultLinkBufferMode
	if size <= 0 {
		node.setFlag(flagUnmanaged)
		return node
	}
	if size < LinkBufferCap {
		size = LinkBufferCap
	}
	node.buf = malloc(0, size)
	return node
}

var linkedPool = sync.Pool{
	New: func() interface{} {
		return &linkBufferNode{
			refer: 1, // comes with 1 reference
		}
	},
}

type linkBufferNode struct {
	buf    []byte          // buffer
	off    int             // read-offset
	malloc int             // write-offset
	refer  int32           // reference count
	mode   uint8           // mode store all bool bit status
	origin *linkBufferNode // the root node of the extends
	next   *linkBufferNode // the next node of the linked buffer
}

func (node *linkBufferNode) Len() (l int) {
	return len(node.buf) - node.off
}

func (node *linkBufferNode) IsEmpty() (ok bool) {
	return node.off == len(node.buf)
}

func (node *linkBufferNode) Reset() {
	if node.origin != nil || atomic.LoadInt32(&node.refer) != 1 {
		return
	}
	node.off, node.malloc = 0, 0
	node.buf = node.buf[:0]
}

func (node *linkBufferNode) Next(n int) (p []byte) {
	off := node.off
	node.off += n
	return node.buf[off:node.off:node.off]
}

func (node *linkBufferNode) Peek(n int) (p []byte) {
	return node.buf[node.off : node.off+n : node.off+n]
}

func (node *linkBufferNode) Malloc(n int) (buf []byte) {
	malloc := node.malloc
	node.malloc += n
	return node.buf[malloc:node.malloc:node.malloc]
}

// Refer holds a reference count at the same time as Next, and releases the real buffer after Release.
// The node obtained by Refer is read-only.
func (node *linkBufferNode) Refer(n int) (p *linkBufferNode) {
	p = newLinkBufferNode(0)
	p.buf = node.Next(n)

	if node.origin != nil {
		p.origin = node.origin
	} else {
		p.origin = node
	}
	atomic.AddInt32(&p.origin.refer, 1)
	return p
}

// Release consists of two parts:
// 1. reduce the reference count of itself and origin.
// 2. recycle the buf when the reference count is 0.
func (node *linkBufferNode) Release() (err error) {
	if node.origin != nil {
		node.origin.Release()
	}
	// release self
	if atomic.AddInt32(&node.refer, -1) == 0 {
		// readonly nodes cannot recycle node.buf, other node.buf are recycled to mcache.
		if node.reusable() {
			free(node.buf)
		}
		node.buf, node.origin, node.next = nil, nil, nil
		linkedPool.Put(node)
	}
	return nil
}

func (node *linkBufferNode) getFlag(flag uint8) bool {
	return node.mode&flag > 0
}

func (node *linkBufferNode) setFlag(flag uint8) {
	node.mode |= flag
}

func (node *linkBufferNode) unsetFlag(flag uint8) {
	node.mode &^= flag
}

// reusable reports whether the node's buffer memory is owned by the LinkBuffer and can be recycled.
// Called during Release to decide if node.buf should be returned to mcache via free.
func (node *linkBufferNode) reusable() bool {
	return node.mode&flagUnmanaged == 0
}

// readExposed reports whether the node's buffer has been returned directly to user code
// via a zero-copy Reader method and may still be referenced externally.
func (node *linkBufferNode) readExposed() bool {
	return node.mode&flagReadExposed > 0
}


================================================
FILE: nocopy_linkbuffer_norace.go
================================================
// Copyright 2024 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !race
// +build !race

package netpoll

type LinkBuffer = UnsafeLinkBuffer


================================================
FILE: nocopy_linkbuffer_race.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build race
// +build race

package netpoll

import (
	"sync"
)

type LinkBuffer = SafeLinkBuffer

// SafeLinkBuffer only used to in go tests with -race
type SafeLinkBuffer struct {
	sync.Mutex
	UnsafeLinkBuffer
}

// ------------------------------------------ implement copy reader ------------------------------------------

func (b *SafeLinkBuffer) readCopy(p []byte) int {
	b.Lock()
	defer b.Unlock()
	return b.UnsafeLinkBuffer.readCopy(p)
}

// ------------------------------------------ implement zero-copy reader ------------------------------------------

// Next implements Reader.
func (b *SafeLinkBuffer) Next(n int) (p []byte, err error) {
	b.Lock()
	defer b.Unlock()
	return b.UnsafeLinkBuffer.Next(n)
}

// Peek implements Reader.
func (b *SafeLinkBuffer) Peek(n int) (p []byte, err error) {
	b.Lock()
	defer b.Unlock()
	return b.UnsafeLinkBuffer.Peek(n)
}

// Skip implements Reader.
func (b *SafeLinkBuffer) Skip(n int) (err error) {
	b.Lock()
	defer b.Unlock()
	return b.UnsafeLinkBuffer.Skip(n)
}

// Until implements Reader.
func (b *SafeLinkBuffer) Until(delim byte) (line []byte, err error) {
	b.Lock()
	defer b.Unlock()
	return b.UnsafeLinkBuffer.Until(delim)
}

// Release implements Reader.
func (b *SafeLinkBuffer) Release() (err error) {
	b.Lock()
	defer b.Unlock()
	return b.UnsafeLinkBuffer.Release()
}

// ReadString implements Reader.
func (b *SafeLinkBuffer) ReadString(n int) (s string, err error) {
	b.Lock()
	defer b.Unlock()
	return b.UnsafeLinkBuffer.ReadString(n)
}

// ReadBinary implements Reader.
func (b *SafeLinkBuffer) ReadBinary(n int) (p []byte, err error) {
	b.Lock()
	defer b.Unlock()
	return b.UnsafeLinkBuffer.ReadBinary(n)
}

// ReadByte implements Reader.
func (b *SafeLinkBuffer) ReadByte() (p byte, err error) {
	b.Lock()
	defer b.Unlock()
	return b.UnsafeLinkBuffer.ReadByte()
}

// Slice implements Reader.
func (b *SafeLinkBuffer) Slice(n int) (r Reader, err error) {
	b.Lock()
	defer b.Unlock()
	return b.UnsafeLinkBuffer.Slice(n)
}

// ------------------------------------------ implement zero-copy writer ------------------------------------------

// Malloc implements Writer.
func (b *SafeLinkBuffer) Malloc(n int) (buf []byte, err error) {
	b.Lock()
	defer b.Unlock()
	return b.UnsafeLinkBuffer.Malloc(n)
}

// MallocLen implements Writer.
func (b *SafeLinkBuffer) MallocLen() (length int) {
	b.Lock()
	defer b.Unlock()
	return b.UnsafeLinkBuffer.MallocLen()
}

// MallocAck implements Writer.
func (b *SafeLinkBuffer) MallocAck(n int) (err error) {
	b.Lock()
	defer b.Unlock()
	return b.UnsafeLinkBuffer.MallocAck(n)
}

// Flush implements Writer.
func (b *SafeLinkBuffer) Flush() (err error) {
	b.Lock()
	defer b.Unlock()
	return b.UnsafeLinkBuffer.Flush()
}

// Append implements Writer.
func (b *SafeLinkBuffer) Append(w Writer) (err error) {
	b.Lock()
	defer b.Unlock()
	return b.UnsafeLinkBuffer.Append(w)
}

// WriteBuffer implements Writer.
func (b *SafeLinkBuffer) WriteBuffer(buf *LinkBuffer) (err error) {
	b.Lock()
	defer b.Unlock()
	return b.UnsafeLinkBuffer.WriteBuffer(buf)
}

// WriteString implements Writer.
func (b *SafeLinkBuffer) WriteString(s string) (n int, err error) {
	b.Lock()
	defer b.Unlock()
	return b.UnsafeLinkBuffer.WriteString(s)
}

// WriteBinary implements Writer.
func (b *SafeLinkBuffer) WriteBinary(p []byte) (n int, err error) {
	b.Lock()
	defer b.Unlock()
	return b.UnsafeLinkBuffer.WriteBinary(p)
}

// WriteDirect cannot be mixed with WriteString or WriteBinary functions.
func (b *SafeLinkBuffer) WriteDirect(p []byte, remainLen int) error {
	b.Lock()
	defer b.Unlock()
	return b.UnsafeLinkBuffer.WriteDirect(p, remainLen)
}

// WriteByte implements Writer.
func (b *SafeLinkBuffer) WriteByte(p byte) (err error) {
	b.Lock()
	defer b.Unlock()
	return b.UnsafeLinkBuffer.WriteByte(p)
}

// Close will recycle all buffer.
func (b *SafeLinkBuffer) Close() (err error) {
	b.Lock()
	defer b.Unlock()
	return b.UnsafeLinkBuffer.Close()
}

// ------------------------------------------ implement connection interface ------------------------------------------

// Bytes returns all the readable bytes of this SafeLinkBuffer.
func (b *SafeLinkBuffer) Bytes() []byte {
	b.Lock()
	defer b.Unlock()
	return b.UnsafeLinkBuffer.Bytes()
}

// GetBytes will read and fill the slice p as much as possible.
func (b *SafeLinkBuffer) GetBytes(p [][]byte) (vs [][]byte) {
	b.Lock()
	defer b.Unlock()
	return b.UnsafeLinkBuffer.GetBytes(p)
}

// book will grow and malloc buffer to hold data.
//
// bookSize: The size of data that can be read at once.
// maxSize: The maximum size of data between two Release(). In some cases, this can
//
//	guarantee all data allocated in one node to reduce copy.
func (b *SafeLinkBuffer) book(bookSize, maxSize int) (p []byte) {
	b.Lock()
	defer b.Unlock()
	return b.UnsafeLinkBuffer.book(bookSize, maxSize)
}

// bookAck will ack the first n malloc bytes and discard the rest.
//
// length: The size of data in inputBuffer. It is used to calculate the maxSize
func (b *SafeLinkBuffer) bookAck(n int) (length int, err error) {
	b.Lock()
	defer b.Unlock()
	return b.UnsafeLinkBuffer.bookAck(n)
}

// calcMaxSize will calculate the data size between two Release()
func (b *SafeLinkBuffer) calcMaxSize() (sum int) {
	b.Lock()
	defer b.Unlock()
	return b.UnsafeLinkBuffer.calcMaxSize()
}

func (b *SafeLinkBuffer) resetTail(maxSize int) {
	b.Lock()
	defer b.Unlock()
	b.UnsafeLinkBuffer.resetTail(maxSize)
}

func (b *SafeLinkBuffer) indexByte(c byte, skip int) int {
	b.Lock()
	defer b.Unlock()
	return b.UnsafeLinkBuffer.indexByte(c, skip)
}


================================================
FILE: nocopy_linkbuffer_test.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows
// +build !windows

package netpoll

import (
	"bytes"
	"encoding/binary"
	"fmt"
	"reflect"
	"sync/atomic"
	"testing"
)

func TestLinkBuffer(t *testing.T) {
	// clean & new
	LinkBufferCap = 128

	buf := NewLinkBuffer()
	Equal(t, buf.Len(), 0)
	MustTrue(t, buf.IsEmpty())

	head := buf.head

	p, err := buf.Next(10)
	Equal(t, len(p), 0)
	MustTrue(t, err != nil)

	buf.Malloc(128)
	MustTrue(t, buf.IsEmpty())

	p, err = buf.Peek(10)
	Equal(t, len(p), 0)
	MustTrue(t, err != nil)

	buf.Flush()
	Equal(t, buf.Len(), 128)
	MustTrue(t, !buf.IsEmpty())

	p, err = buf.Next(28)
	Equal(t, len(p), 28)
	Equal(t, buf.Len(), 100)
	MustNil(t, err)
	MustTrue(t, buf.read.readExposed()) // single-node Next exposes buffer

	p, err = buf.Peek(90)
	Equal(t, len(p), 90)
	Equal(t, buf.Len(), 100)
	MustNil(t, err)
	MustTrue(t, buf.read.readExposed()) // single-node Peek exposes buffer

	read := buf.read
	Equal(t, buf.head, head)
	err = buf.Release()
	MustNil(t, err)
	Equal(t, buf.head, read)

	inputs := buf.book(block1k, block8k)
	Equal(t, len(inputs), block1k)
	Equal(t, buf.Len(), 100)

	buf.MallocAck(block1k)
	Equal(t, buf.Len(), 100)
	Equal(t, buf.MallocLen(), block1k)
	buf.Flush()
	Equal(t, buf.Len(), 100+block1k)
	Equal(t, buf.MallocLen(), 0)

	outputs := buf.GetBytes(make([][]byte, 16))
	Equal(t, len(outputs), 2)

	err = buf.Skip(block1k)
	MustNil(t, err)
	Equal(t, buf.Len(), 100)
}

func TestLinkBufferGetBytes(t *testing.T) {
	buf := NewLinkBuffer()
	var (
		num         = 10
		b           = 1
		expectedLen = 0
	)
	for i := 0; i < num; i++ {
		expectedLen += b
		n, err := buf.WriteBinary(make([]byte, b))
		MustNil(t, err)
		Equal(t, n, b)
		b *= 10
	}
	buf.Flush()
	Equal(t, int(buf.length), expectedLen)
	bs := buf.GetBytes(nil)
	actualLen := 0
	for i := 0; i < len(bs); i++ {
		actualLen += len(bs[i])
	}
	Equal(t, actualLen, expectedLen)
}

// TestLinkBufferWithZero test more case with n is invalid.
func TestLinkBufferWithInvalid(t *testing.T) {
	// clean & new
	LinkBufferCap = 128

	buf := NewLinkBuffer()
	Equal(t, buf.Len(), 0)
	MustTrue(t, buf.IsEmpty())

	for n := 0; n > -5; n-- {
		// test writer
		p, err := buf.Malloc(n)
		Equal(t, len(p), 0)
		Equal(t, buf.MallocLen(), 0)
		Equal(t, buf.Len(), 0)
		MustNil(t, err)

		var wn int
		wn, err = buf.WriteString("")
		Equal(t, wn, 0)
		Equal(t, buf.MallocLen(), 0)
		Equal(t, buf.Len(), 0)
		MustNil(t, err)

		wn, err = buf.WriteBinary(nil)
		Equal(t, wn, 0)
		Equal(t, buf.MallocLen(), 0)
		Equal(t, buf.Len(), 0)
		MustNil(t, err)

		err = buf.WriteDirect(nil, n)
		Equal(t, buf.MallocLen(), 0)
		Equal(t, buf.Len(), 0)
		MustNil(t, err)

		var w *LinkBuffer
		err = buf.Append(w)
		Equal(t, buf.MallocLen(), 0)
		Equal(t, buf.Len(), 0)
		MustNil(t, err)

		err = buf.MallocAck(n)
		Equal(t, buf.MallocLen(), 0)
		Equal(t, buf.Len(), 0)
		if n == 0 {
			MustNil(t, err)
		} else {
			MustTrue(t, err != nil)
		}

		err = buf.Flush()
		MustNil(t, err)

		// test reader
		p, err = buf.Next(n)
		Equal(t, len(p), 0)
		MustNil(t, err)

		p, err = buf.Peek(n)
		Equal(t, len(p), 0)
		MustNil(t, err)

		err = buf.Skip(n)
		Equal(t, len(p), 0)
		MustNil(t, err)

		var s string
		s, err = buf.ReadString(n)
		Equal(t, len(s), 0)
		MustNil(t, err)

		p, err = buf.ReadBinary(n)
		Equal(t, len(p), 0)
		MustNil(t, err)

		var r Reader
		r, err = buf.Slice(n)
		Equal(t, r.Len(), 0)
		MustNil(t, err)

		err = buf.Release()
		MustNil(t, err)
	}
}

func TestLinkBufferMultiNode(t *testing.T) {
	// clean & new
	LinkBufferCap = 8

	buf := NewLinkBuffer()
	Equal(t, buf.Len(), 0)
	MustTrue(t, buf.IsEmpty())
	var p []byte

	p, _ = buf.Malloc(15)
	for i := 0; i < len(p); i++ { // updates p[0] - p[14] to 0 - 14
		p[i] = byte(i)
	}
	Equal(t, len(p), 15)
	MustTrue(t, buf.read == buf.flush)
	Equal(t, buf.read.off, 0)
	Equal(t, buf.read.malloc, 0)
	Equal(t, buf.write.off, 0)
	Equal(t, buf.write.malloc, 15)
	Equal(t, cap(buf.write.buf), 16) // mcache up-aligned to the power of 2

	p, _ = buf.Malloc(7)
	for i := 0; i < len(p); i++ { // updates p[0] - p[6] to 15 - 21
		p[i] = byte(i + 15)
	}
	Equal(t, len(p), 7)
	MustTrue(t, buf.read == buf.flush)
	Equal(t, buf.read.off, 0)
	Equal(t, buf.read.malloc, 0)
	Equal(t, buf.write.off, 0)
	Equal(t, buf.write.malloc, 7)
	Equal(t, cap(buf.write.buf), LinkBufferCap)

	buf.Flush()
	MustTrue(t, buf.read != buf.flush)
	MustTrue(t, buf.flush == buf.write)
	Equal(t, buf.read.off, 0)
	Equal(t, len(buf.read.buf), 0)
	Equal(t, buf.read.next.off, 0)
	Equal(t, len(buf.read.next.buf), 15)
	Equal(t, buf.flush.off, 0)
	Equal(t, buf.flush.malloc, 7)
	Equal(t, len(buf.flush.buf), 7)

	p, _ = buf.Next(13)
	Equal(t, len(p), 13)
	Equal(t, p[0], byte(0))
	Equal(t, p[12], byte(12))
	MustTrue(t, buf.read != buf.flush)
	Equal(t, buf.read.off, 13)
	Equal(t, buf.read.Len(), 2)
	Equal(t, buf.read.next.Len(), 7)
	Equal(t, buf.flush.off, 0)
	Equal(t, buf.flush.malloc, 7)
	MustTrue(t, buf.read.readExposed())   // single-node Next
	MustTrue(t, !buf.flush.readExposed()) // not touched yet

	// Peek
	p, _ = buf.Peek(4)
	Equal(t, len(p), 4)
	Equal(t, p[0], byte(13))
	Equal(t, p[1], byte(14))
	Equal(t, p[2], byte(15))
	Equal(t, p[3], byte(16))
	Equal(t, len(buf.cachePeek), 4)
	p, _ = buf.Peek(3) // case: smaller than the last call
	Equal(t, len(p), 3)
	Equal(t, p[0], byte(13))
	Equal(t, p[2], byte(15))
	Equal(t, len(buf.cachePeek), 4)
	p, _ = buf.Peek(5) // case: Peek than the max call, and cap(buf.cachePeek) < n
	Equal(t, len(p), 5)
	Equal(t, p[0], byte(13))
	Equal(t, p[4], byte(17))
	Equal(t, len(buf.cachePeek), 5)
	p, _ = buf.Peek(6) // case: Peek than the last call, and cap(buf.cachePeek) > n
	Equal(t, len(p), 6)
	Equal(t, p[0], byte(13))
	Equal(t, p[5], byte(18))
	Equal(t, len(buf.cachePeek), 6)
	MustTrue(t, buf.read != buf.flush)
	Equal(t, buf.read.off, 13)
	Equal(t, buf.read.Len(), 2)
	Equal(t, buf.flush.off, 0)
	Equal(t, buf.flush.malloc, 7)
	MustTrue(t, !buf.flush.readExposed()) // multi-node Peek copies, doesn't expose
	// Peek ends

	buf.book(block8k, block8k)
	MustTrue(t, buf.flush == buf.write)
	Equal(t, buf.flush.off, 0)
	Equal(t, buf.flush.malloc, 8)
	Equal(t, buf.flush.Len(), 7)
	Equal(t, buf.write.off, 0)
	Equal(t, buf.write.malloc, 8)
	Equal(t, buf.write.Len(), 7)

	buf.book(block8k, block8k)
	MustTrue(t, buf.flush != buf.write)
	Equal(t, buf.flush.off, 0)
	Equal(t, buf.flush.malloc, 8)
	Equal(t, buf.flush.Len(), 7)
	Equal(t, buf.write.off, 0)
	Equal(t, buf.write.malloc, 8192)
	Equal(t, buf.write.Len(), 0)

	buf.MallocAck(5)
	MustTrue(t, buf.flush != buf.write)
	Equal(t, buf.write.off, 0)
	Equal(t, buf.write.malloc, 4)
	Equal(t, buf.write.Len(), 0)
	MustTrue(t, buf.write.next == nil)
	buf.Flush()

	p, _ = buf.Next(8)
	Equal(t, len(p), 8)
	MustTrue(t, buf.read != buf.flush)
	Equal(t, buf.read.off, 6)
	Equal(t, buf.read.Len(), 2)
	Equal(t, buf.flush.off, 0)
	Equal(t, buf.flush.malloc, 4)
	Equal(t, buf.flush.Len(), 4)

	err := buf.Skip(3)
	MustNil(t, err)
	MustTrue(t, buf.read == buf.flush)
	Equal(t, buf.read.off, 1)
	Equal(t, buf.read.Len(), 3)
	Equal(t, buf.flush.malloc, 4)
}

func TestLinkBufferRefer(t *testing.T) {
	// clean & new
	LinkBufferCap = 8

	wbuf := NewLinkBuffer()
	wbuf.book(block8k, block8k)
	wbuf.Malloc(7)
	wbuf.Flush()
	Equal(t, wbuf.Len(), block8k+7)

	buf := NewLinkBuffer()
	var p []byte

	// writev
	buf.WriteBuffer(wbuf)
	buf.Flush()
	Equal(t, buf.Len(), block8k+7)

	p, _ = buf.Next(5)
	Equal(t, len(p), 5)
	MustTrue(t, buf.read != buf.flush)
	Equal(t, buf.read.off, 5)
	Equal(t, buf.read.Len(), block8k-5)
	Equal(t, buf.flush.off, 0)
	Equal(t, buf.flush.malloc, 7)
	Equal(t, cap(buf.flush.buf), 8)
	MustTrue(t, buf.read.readExposed()) // single-node Next

	// readv
	_rbuf, err := buf.Slice(4)
	rbuf, ok := _rbuf.(*LinkBuffer)
	MustNil(t, err)
	MustTrue(t, ok)
	Equal(t, rbuf.Len(), 4)
	MustTrue(t, rbuf.read != rbuf.flush)
	Equal(t, rbuf.read.off, 0)
	Equal(t, rbuf.read.Len(), 4)

	MustTrue(t, buf.head != buf.read) // Slice will Release
	MustTrue(t, rbuf.read != buf.read)
	Equal(t, buf.Len(), block8k-2)
	MustTrue(t, buf.read != buf.flush)
	Equal(t, buf.read.off, 9)
	Equal(t, buf.read.malloc, block8k)

	// release
	node1 := rbuf.head
	node2 := buf.head
	rbuf.Skip(rbuf.Len())
	err = rbuf.Release()
	MustNil(t, err)
	MustTrue(t, rbuf.head != node1)
	MustTrue(t, buf.head == node2)

	err = buf.Release()
	MustNil(t, err)
	MustTrue(t, buf.head != node2)
	MustTrue(t, buf.head == buf.read)
	Equal(t, buf.read.off, 9)
	Equal(t, buf.read.malloc, block8k)
	Equal(t, buf.read.refer, int32(1))
	Equal(t, buf.read.Len(), block8k-9)
}

func TestLinkBufferResetTail(t *testing.T) {
	except := byte(1)

	LinkBufferCap = 8
	buf := NewLinkBuffer()

	// 1. slice reader
	buf.WriteByte(except)
	buf.Flush()
	r1, _ := buf.Slice(1)
	t.Logf("1: %x\n", buf.flush.buf)
	// 2. release & reset tail
	buf.resetTail(LinkBufferCap)
	buf.WriteByte(byte(2))
	t.Logf("2: %x\n", buf.flush.buf)

	// check slice reader
	got, _ := r1.ReadByte()
	Equal(t, got, except)
}

func TestLinkBufferWriteBuffer(t *testing.T) {
	buf1 := NewLinkBuffer()
	buf2 := NewLinkBuffer()
	b2, _ := buf2.Malloc(1)
	b2[0] = 2
	buf2.Flush()
	buf3 := NewLinkBuffer()
	b3, _ := buf3.Malloc(1)
	b3[0] = 3
	buf3.Flush()
	buf1.WriteBuffer(buf2)
	buf1.WriteBuffer(buf3)
	buf1.Flush()
	MustTrue(t, bytes.Equal(buf1.Bytes(), []byte{2, 3}))
}

func TestLinkBufferCheckSingleNode(t *testing.T) {
	buf := NewLinkBuffer(block4k)
	_, err := buf.Malloc(block8k)
	MustNil(t, err)
	buf.Flush()
	MustTrue(t, buf.read.Len() == 0)
	is := buf.isSingleNode(block8k)
	MustTrue(t, is)
	MustTrue(t, buf.read.Len() == block8k)
	is = buf.isSingleNode(block8k + 1)
	MustTrue(t, !is)

	// cross node malloc, but b.read.Len() still == 0
	buf = NewLinkBuffer(block4k)
	_, err = buf.Malloc(block8k)
	MustNil(t, err)
	// not malloc ack yet
	// read function will call isSingleNode inside
	buf.isSingleNode(1)
}

func TestLinkBufferWriteMultiFlush(t *testing.T) {
	buf := NewLinkBuffer()
	b1, _ := buf.Malloc(4)
	b1[0] = 1
	b1[2] = 2
	err := buf.Flush()
	MustNil(t, err)
	err = buf.Flush()
	MustNil(t, err)
	MustTrue(t, buf.Bytes()[0] == 1)
	MustTrue(t, len(buf.Bytes()) == 4)

	err = buf.Skip(2)
	MustNil(t, err)
	MustTrue(t, buf.Bytes()[0] == 2)
	MustTrue(t, len(buf.Bytes()) == 2)
	err = buf.Flush()
	MustNil(t, err)
	MustTrue(t, buf.Bytes()[0] == 2)
	MustTrue(t, len(buf.Bytes()) == 2)

	b2, _ := buf.Malloc(2)
	b2[0] = 3
	err = buf.Flush()
	MustNil(t, err)
	MustTrue(t, buf.Bytes()[0] == 2)
	MustTrue(t, buf.Bytes()[2] == 3)
	MustTrue(t, len(buf.Bytes()) == 4)
}

func TestLinkBufferWriteBinary(t *testing.T) {
	// clean & new
	LinkBufferCap = 8

	// new b: cap=16, len=9
	b := make([]byte, 16)
	buf := NewLinkBuffer()
	buf.WriteBinary(b[:9])
	buf.Flush()

	// Currently, b[9:] should no longer be held.
	// WriteBinary/Malloc etc. cannot start from b[9:]
	buf.WriteBinary([]byte{1})
	Equal(t, b[9], byte(0))
	bs, err := buf.Malloc(1)
	MustNil(t, err)
	bs[0] = 2
	buf.Flush()
	Equal(t, b[9], byte(0))
}

func TestLinkBufferWriteDirect(t *testing.T) {
	// clean & new
	LinkBufferCap = 32

	buf := NewLinkBuffer()
	bt, _ := buf.Malloc(32)
	bt[0] = 'a'
	bt[1] = 'b'
	buf.WriteDirect([]byte("cdef"), 30)
	bt[2] = 'g'
	buf.WriteDirect([]byte("hijkl"), 29)
	bt[3] = 'm'
	buf.WriteDirect([]byte("nopqrst"), 28)
	bt[4] = 'u'
	buf.WriteDirect([]byte("vwxyz"), 27)
	copy(bt[5:], "abcdefghijklmnopqrstuvwxyza")
	buf.WriteDirect([]byte("abcdefghijklmnopqrstuvwxyz"), 0)
	buf.Flush()
	bs := buf.Bytes()
	str := "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzaabcdefghijklmnopqrstuvwxyz"
	for i := 0; i < len(str); i++ {
		if bs[i] != str[i] {
			t.Error("not equal!")
		}
	}
}

func TestLinkBufferBufferMode(t *testing.T) {
	bufnode := newLinkBufferNode(0)
	MustTrue(t, bufnode.getFlag(flagUnmanaged))
	MustTrue(t, !bufnode.reusable())
	MustTrue(t, !bufnode.readExposed())

	bufnode = newLinkBufferNode(1)
	MustTrue(t, !bufnode.getFlag(flagUnmanaged))
	MustTrue(t, bufnode.reusable())
	MustTrue(t, !bufnode.readExposed())
}

func TestLinkBufferReadCopy(t *testing.T) {
	t.Run("SingleNode", func(t *testing.T) {
		LinkBufferCap = 128
		buf := NewLinkBuffer(128)
		p, _ := buf.Malloc(16)
		for i := range p {
			p[i] = byte(i)
		}
		buf.Flush()

		dst := make([]byte, 10)
		n := buf.readCopy(dst)
		Equal(t, n, 10)
		for i := 0; i < 10; i++ {
			Equal(t, dst[i], byte(i))
		}
		Equal(t, buf.Len(), 6)
		// readCopy must not set readExposed
		MustTrue(t, !buf.read.readExposed())
	})

	t.Run("MultiNode", func(t *testing.T) {
		LinkBufferCap = 8
		buf := NewLinkBuffer(8)
		p, _ := buf.Malloc(8)
		for i := range p {
			p[i] = byte(i)
		}
		buf.Flush()
		p, _ = buf.Malloc(8)
		for i := range p {
			p[i] = byte(i + 8)
		}
		buf.Flush()

		dst := make([]byte, 16)
		n := buf.readCopy(dst)
		Equal(t, n, 16)
		for i := 0; i < 16; i++ {
			Equal(t, dst[i], byte(i))
		}
		Equal(t, buf.Len(), 0)
	})

	t.Run("PartialRead", func(t *testing.T) {
		LinkBufferCap = 128
		buf := NewLinkBuffer(128)
		p, _ := buf.Malloc(4)
		for i := range p {
			p[i] = byte(i + 1)
		}
		buf.Flush()

		// read more than available
		dst := make([]byte, 16)
		n := buf.readCopy(dst)
		Equal(t, n, 4)
		Equal(t, dst[0], byte(1))
		Equal(t, dst[3], byte(4))
		Equal(t, buf.Len(), 0)
	})

	t.Run("ReleasesNonExposedNodes", func(t *testing.T) {
		LinkBufferCap = 8
		buf := NewLinkBuffer(8)
		buf.Malloc(8)
		buf.Flush()
		buf.Malloc(8)
		buf.Flush()
		node1 := buf.read

		dst := make([]byte, 16)
		buf.readCopy(dst)
		// node1 was not exposed, should be released (head advanced past it)
		MustTrue(t, buf.head != node1)
	})

	t.Run("SkipsExposedNodes", func(t *testing.T) {
		LinkBufferCap = 8
		buf := NewLinkBuffer(8)
		p, _ := buf.Malloc(8)
		for i := range p {
			p[i] = byte(i)
		}
		buf.Flush()
		buf.Malloc(8)
		buf.Flush()

		// expose node1 via Peek
		buf.Peek(4)
		node1 := buf.read
		MustTrue(t, node1.readExposed())

		// readCopy past both nodes
		dst := make([]byte, 16)
		n := buf.readCopy(dst)
		Equal(t, n, 16)
		Equal(t, dst[0], byte(0))
		// head should stay at exposed node1
		Equal(t, buf.head, node1)

		// subsequent Release frees the exposed node
		buf.Release()
		MustTrue(t, buf.head != node1)
	})

	// [exposed/consumed] → [not-exposed/consumed] → [partial-consumed/read]
	t.Run("ExposedThenNonExposedThenPartial", func(t *testing.T) {
		LinkBufferCap = 8
		buf := NewLinkBuffer(8)
		// node1: 8 bytes
		p, _ := buf.Malloc(8)
		for i := range p {
			p[i] = byte(i)
		}
		buf.Flush()
		// node2: 8 bytes
		p, _ = buf.Malloc(8)
		for i := range p {
			p[i] = byte(i + 8)
		}
		buf.Flush()
		// node3: 8 bytes
		p, _ = buf.Malloc(8)
		for i := range p {
			p[i] = byte(i + 16)
		}
		buf.Flush()

		// expose node1 via Peek
		buf.Peek(4)
		node1 := buf.read
		node2 := node1.next
		MustTrue(t, node1.readExposed())
		MustTrue(t, !node2.readExposed())

		// readCopy 20 bytes: consumes node1(8) + node2(8) + 4 from node3
		dst := make([]byte, 20)
		n := buf.readCopy(dst)
		Equal(t, n, 20)
		for i := 0; i < 20; i++ {
			Equal(t, dst[i], byte(i))
		}
		Equal(t, buf.Len(), 4)

		// head should be node1 (exposed, kept in chain)
		Equal(t, buf.head, node1)
		// node2 was released, node1.next should skip to read (node3)
		Equal(t, node1.next, buf.read)

		// subsequent Release frees the exposed node
		buf.Release()
		MustTrue(t, buf.head == buf.read)
	})
}

func BenchmarkLinkBufferConcurrentReadWrite(b *testing.B) {
	b.StopTimer()

	buf := NewLinkBuffer()
	var rwTag uint32
	readMsg := []string{
		"0123456",
		"7890123",
		"4567890",
		"1234567",
		"8901234",
		"5678901",
		"2345678",
		"9012345",
		"6789012",
		"3456789",
	}
	writeMsg := []byte("0123456789")

	// benchmark
	b.ReportAllocs()
	b.StartTimer()
	b.SetParallelism(2) // one read one write
	b.RunParallel(func(pb *testing.PB) {
		switch atomic.AddUint32(&rwTag, 1) {
		case 1:
			// 1 is write
			for pb.Next() {
				p, err := buf.Malloc(80)
				if err != nil {
					panic(fmt.Sprintf("malloc error %s", err.Error()))
				}
				for i := 0; i < 7; i++ {
					copy(p[i*10:i*10+10], writeMsg)
				}
				buf.MallocAck(70)
				buf.Flush()
			}
		case 2:
			// 2 is read
			for pb.Next() {
				for i := 0; i < 10; {
					p, err := buf.Next(7)
					if err == nil {
						if string(p) != readMsg[i] {
							panic(fmt.Sprintf("NEXT p[%s] != msg[%s]", p, readMsg[i]))
						}
					} else {
						// No read data, wait for write
						continue
					}
					i++
				}
				buf.Release()
			}
		}
	})
}

func TestUnsafeStringToSlice(t *testing.T) {
	s := "hello world"
	bs := unsafeStringToSlice(s)
	s = "hi, boy"
	_ = s
	Equal(t, string(bs), "hello world")
}

func TestLinkBufferIndexByte(t *testing.T) {
	// clean & new
	LinkBufferCap = 128
	loopSize := 1000
	trigger := make(chan struct{}, 16)

	lb := NewLinkBuffer()
	empty := make([]byte, 1002)
	go func() {
		for i := 0; i < loopSize; i++ {
			buf, err := lb.Malloc(1002)
			// need clear buffer
			copy(buf, empty)
			buf[500] = '\n'
			buf[1001] = '\n'
			MustNil(t, err)
			lb.Flush()
			trigger <- struct{}{}
		}
	}()

	for i := 0; i < loopSize; i++ {
		<-trigger
		last := i * 1002
		n := lb.indexByte('\n', 0+last)
		Equal(t, n, 500+last)
		n = lb.indexByte('\n', 500+last)
		Equal(t, n, 500+last)
		n = lb.indexByte('\n', 501+last)
		Equal(t, n, 1001+last)
	}
}

func TestLinkBufferPeekOutOfMemory(t *testing.T) {
	bufCap := 1024 * 8
	bufNodes := 100
	magicN := uint64(2024)
	buf := NewLinkBuffer(bufCap)
	MustTrue(t, buf.IsEmpty())
	Equal(t, cap(buf.write.buf), bufCap)
	Equal(t, buf.memorySize(), bufCap)

	var p []byte
	var err error
	// write data that cross multi nodes
	for n := 0; n < bufNodes; n++ {
		p, err = buf.Malloc(bufCap)
		MustNil(t, err)
		Equal(t, len(p), bufCap)
		binary.BigEndian.PutUint64(p, magicN)
	}
	Equal(t, buf.MallocLen(), bufCap*bufNodes)
	buf.Flush()
	Equal(t, buf.MallocLen(), 0)

	// peak data that in single node
	for i := 0; i < 10; i++ {
		p, err = buf.Peek(bufCap)
		Equal(t, binary.BigEndian.Uint64(p), magicN)
		MustNil(t, err)
		Equal(t, len(p), bufCap)
		Equal(t, buf.memorySize(), bufCap*bufNodes)
	}

	// peak data that cross nodes
	memorySize := 0
	for i := 0; i < 1024; i++ {
		p, err = buf.Peek(bufCap + 1)
		MustNil(t, err)
		Equal(t, binary.BigEndian.Uint64(p), magicN)
		Equal(t, len(p), bufCap+1)
		if memorySize == 0 {
			memorySize = buf.memorySize()
			t.Logf("after Peek: memorySize=%d", memorySize)
		} else {
			Equal(t, buf.memorySize(), memorySize)
		}
	}
}

func TestMallocAck(t *testing.T) {
	sLen := 1024 * 7
	buf1 := []byte{1, 2, 3, 4}
	buf2 := []byte{5, 6, 7, 8}
	lb := NewLinkBuffer(0)

	buf, err := lb.Malloc(4 + sLen)
	MustNil(t, err)
	copy(buf[:4], buf1)
	s := make([]byte, sLen)
	err = lb.WriteDirect(s, sLen)
	MustNil(t, err)

	err = lb.MallocAck(4 + sLen)
	MustNil(t, err)
	lb.Flush()

	buf, err = lb.Malloc(4)
	MustNil(t, err)
	copy(buf[:4], buf2)
	lb.Flush()

	buf, err = lb.Next(8 + sLen)
	MustNil(t, err)

	MustTrue(t, reflect.DeepEqual(buf, append(append(buf1, s...), buf2...)))
}

func BenchmarkStringToSliceByte(b *testing.B) {
	b.StopTimer()
	s := "hello world"
	var bs []byte
	if false {
		b.Logf("bs = %s", bs)
	}

	// benchmark
	b.ReportAllocs()
	b.StartTimer()
	for i := 0; i < b.N; i++ {
		bs = unsafeStringToSlice(s)
	}
	_ = bs
}

func BenchmarkStringToCopy(b *testing.B) {
	b.StopTimer()
	s := "hello world"
	var bs []byte
	b.Logf("bs = %s", bs)

	// benchmark
	b.ReportAllocs()
	b.StartTimer()
	for i := 0; i < b.N; i++ {
		bs = []byte(s)
	}
	_ = bs
}

func BenchmarkLinkBufferPoolGet(b *testing.B) {
	var v *linkBufferNode
	if false {
		b.Logf("bs = %v", v)
	}

	// benchmark
	b.ReportAllocs()
	b.SetParallelism(100)
	b.ResetTimer()
	b.RunParallel(func(pb *testing.PB) {
		for pb.Next() {
			v = newLinkBufferNode(0)
			v.Release()
		}
	})
}

func BenchmarkCopyString(b *testing.B) {
	s := make([]byte, 128*1024)

	// benchmark
	b.ReportAllocs()
	b.SetParallelism(100)
	b.ResetTimer()
	b.RunParallel(func(pb *testing.PB) {
		v := make([]byte, 1024)
		for pb.Next() {
			copy(v, s)
		}
	})
}

func BenchmarkLinkBufferNoCopyRead(b *testing.B) {
	totalSize := 0
	minSize := 32
	maxSize := minSize << 9
	for size := minSize; size <= maxSize; size = size << 1 {
		totalSize += size
	}
	b.ReportAllocs()
	b.ResetTimer()
	b.RunParallel(func(pb *testing.PB) {
		buffer := NewLinkBuffer(pagesize)
		for pb.Next() {
			buf, err := buffer.Malloc(totalSize)
			if len(buf) != totalSize || err != nil {
				b.Fatal(err)
			}
			err = buffer.MallocAck(totalSize)
			if err != nil {
				b.Fatal(err)
			}
			err = buffer.Flush()
			if err != nil {
				b.Fatal(err)
			}

			for size := minSize; size <= maxSize; size = size << 1 {
				buf, err = buffer.ReadBinary(size)
				if len(buf) != size || err != nil {
					b.Fatal(err)
				}
			}
			// buffer.Release will not reuse memory since we use no copy mode here
			err = buffer.Release()
			if err != nil {
				b.Fatal(err)
			}
		}
	})
}


================================================
FILE: nocopy_readwriter.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package netpoll

import (
	"fmt"
	"io"
)

const maxReadCycle = 16

func newZCReader(r io.Reader) *zcReader {
	return &zcReader{
		r:   r,
		buf: NewLinkBuffer(),
	}
}

var _ Reader = &zcReader{}

// zcReader implements Reader.
type zcReader struct {
	r   io.Reader
	buf *LinkBuffer
}

// Next implements Reader.
func (r *zcReader) Next(n int) (p []byte, err error) {
	if err = r.waitRead(n); err != nil {
		return p, err
	}
	return r.buf.Next(n)
}

// Peek implements Reader.
func (r *zcReader) Peek(n int) (buf []byte, err error) {
	if err = r.waitRead(n); err != nil {
		return buf, err
	}
	return r.buf.Peek(n)
}

// Skip implements Reader.
func (r *zcReader) Skip(n int) (err error) {
	if err = r.waitRead(n); err != nil {
		return err
	}
	return r.buf.Skip(n)
}

// Release implements Reader.
func (r *zcReader) Release() (err error) {
	return r.buf.Release()
}

// Slice implements Reader.
func (r *zcReader) Slice(n int) (reader Reader, err error) {
	if err = r.waitRead(n); err != nil {
		return nil, err
	}
	return r.buf.Slice(n)
}

// Len implements Reader.
func (r *zcReader) Len() (length int) {
	return r.buf.Len()
}

// ReadString implements Reader.
func (r *zcReader) ReadString(n int) (s string, err error) {
	if err = r.waitRead(n); err != nil {
		return s, err
	}
	return r.buf.ReadString(n)
}

// ReadBinary implements Reader.
func (r *zcReader) ReadBinary(n int) (p []byte, err error) {
	if err = r.waitRead(n); err != nil {
		return p, err
	}
	return r.buf.ReadBinary(n)
}

// ReadByte implements Reader.
func (r *zcReader) ReadByte() (b byte, err error) {
	if err = r.waitRead(1); err != nil {
		return b, err
	}
	return r.buf.ReadByte()
}

func (r *zcReader) Until(delim byte) (line []byte, err error) {
	return r.buf.Until(delim)
}

func (r *zcReader) waitRead(n int) (err error) {
	for r.buf.Len() < n {
		err = r.fill(n)
		if err != nil {
			if err == io.EOF {
				err = Exception(ErrEOF, "")
			}
			return err
		}
	}
	return nil
}

// fill buffer to greater than n, range no more than 16 times.
func (r *zcReader) fill(n int) (err error) {
	var buf []byte
	var num int
	for i := 0; i < maxReadCycle && r.buf.Len() < n && err == nil; i++ {
		buf, err = r.buf.Malloc(block4k)
		if err != nil {
			return err
		}
		num, err = r.r.Read(buf)
		if num < 0 {
			if err == nil {
				err = fmt.Errorf("zcReader fill negative count[%d]", num)
			}
			num = 0
		}
		r.buf.MallocAck(num)
		r.buf.Flush()
		if err != nil {
			return err
		}
	}
	return err
}

func newZCWriter(w io.Writer) *zcWriter {
	return &zcWriter{
		w:   w,
		buf: NewLinkBuffer(),
	}
}

var _ Writer = &zcWriter{}

// zcWriter implements Writer.
type zcWriter struct {
	w   io.Writer
	buf *LinkBuffer
}

// Malloc implements Writer.
func (w *zcWriter) Malloc(n int) (buf []byte, err error) {
	return w.buf.Malloc(n)
}

// MallocLen implements Writer.
func (w *zcWriter) MallocLen() (length int) {
	return w.buf.MallocLen()
}

// Flush implements Writer.
func (w *zcWriter) Flush() (err error) {
	w.buf.Flush()
	n, err := w.w.Write(w.buf.Bytes())
	if n > 0 {
		w.buf.Skip(n)
		w.buf.Release()
	}
	return err
}

// MallocAck implements Writer.
func (w *zcWriter) MallocAck(n int) (err error) {
	return w.buf.MallocAck(n)
}

// Append implements Writer.
func (w *zcWriter) Append(w2 Writer) (err error) {
	return w.buf.Append(w2)
}

// WriteString implements Writer.
func (w *zcWriter) WriteString(s string) (n int, err error) {
	return w.buf.WriteString(s)
}

// WriteBinary implements Writer.
func (w *zcWriter) WriteBinary(b []byte) (n int, err error) {
	return w.buf.WriteBinary(b)
}

// WriteDirect implements Writer.
func (w *zcWriter) WriteDirect(p []byte, remainCap int) error {
	return w.buf.WriteDirect(p, remainCap)
}

// WriteByte implements Writer.
func (w *zcWriter) WriteByte(b byte) (err error) {
	return w.buf.WriteByte(b)
}

// zcWriter implements ReadWriter.
type zcReadWriter struct {
	*zcReader
	*zcWriter
}

func newIOReader(r Reader) *ioReader {
	return &ioReader{
		r: r,
	}
}

var _ io.Reader = &ioReader{}

// ioReader implements io.Reader.
//
// Deprecated: connection already implements Read directly with optimized buffer access.
// This wrapper exists only for external Reader implementations.
type ioReader struct {
	r Reader
}

// Read implements io.Reader.
//
// BUG: Read calls Release which invalidates any slices previously returned by Next or Peek
// on the same Reader. Do not mix Next/Peek and Read on the same Reader without first
// calling Release.
func (r *ioReader) Read(p []byte) (n int, err error) {
	l := len(p)
	if l == 0 {
		return 0, nil
	}
	// read min(len(p), buffer.Len)
	if has := r.r.Len(); has < l {
		l = has
	}
	if l == 0 {
		return 0, io.EOF
	}
	src, err := r.r.Next(l)
	if err != nil {
		return 0, err
	}
	n = copy(p, src)
	err = r.r.Release()
	if err != nil {
		return 0, err
	}
	return n, nil
}

func newIOWriter(w Writer) *ioWriter {
	return &ioWriter{
		w: w,
	}
}

var _ io.Writer = &ioWriter{}

// ioWriter implements io.Writer.
type ioWriter struct {
	w Writer
}

// Write implements io.Writer.
func (w *ioWriter) Write(p []byte) (n int, err error) {
	dst, err := w.w.Malloc(len(p))
	if err != nil {
		return 0, err
	}
	n = copy(dst, p)
	err = w.w.Flush()
	if err != nil {
		return 0, err
	}
	return n, nil
}

// ioReadWriter implements io.ReadWriter.
type ioReadWriter struct {
	io.Reader
	io.Writer
}


================================================
FILE: nocopy_readwriter_test.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows
// +build !windows

package netpoll

import (
	"errors"
	"io"
	"io/ioutil"
	"testing"
)

func TestZCReader(t *testing.T) {
	reader := &MockIOReadWriter{
		read: func(p []byte) (n int, err error) {
			return len(p), nil
		},
	}
	r := newZCReader(reader)

	p, err := r.Next(block8k)
	MustNil(t, err)
	Equal(t, len(p), block8k)
	Equal(t, r.buf.Len(), 0)

	p, err = r.Peek(block4k)
	MustNil(t, err)
	Equal(t, len(p), block4k)
	Equal(t, r.buf.Len(), block4k)

	err = r.Skip(block4k)
	MustNil(t, err)
	Equal(t, r.buf.Len(), 0)

	err = r.Release()
	MustNil(t, err)
}

func TestZCWriter(t *testing.T) {
	writer := &MockIOReadWriter{
		write: func(p []byte) (n int, err error) {
			return len(p), nil
		},
	}
	w := newZCWriter(writer)

	p, err := w.Malloc(block1k)
	MustNil(t, err)
	Equal(t, len(p), block1k)
	Equal(t, w.buf.Len(), 0)

	err = w.Flush()
	MustNil(t, err)
	Equal(t, w.buf.Len(), 0)

	p, err = w.Malloc(block2k)
	MustNil(t, err)
	Equal(t, len(p), block2k)
	Equal(t, w.buf.Len(), 0)

	err = w.buf.Flush()
	MustNil(t, err)
	Equal(t, w.buf.Len(), block2k)

	err = w.Flush()
	MustNil(t, err)
	Equal(t, w.buf.Len(), 0)
}

func TestZCEOF(t *testing.T) {
	reader := &MockIOReadWriter{
		read: func(p []byte) (n int, err error) {
			return 0, io.EOF
		},
	}
	r := newZCReader(reader)

	_, err := r.Next(block8k)
	MustTrue(t, errors.Is(err, ErrEOF))
}

type MockIOReadWriter struct {
	read  func(p []byte) (n int, err error)
	write func(p []byte) (n int, err error)
}

func (rw *MockIOReadWriter) Read(p []byte) (n int, err error) {
	if rw.read != nil {
		return rw.read(p)
	}
	return
}

func (rw *MockIOReadWriter) Write(p []byte) (n int, err error) {
	if rw.write != nil {
		return rw.write(p)
	}
	return
}

func TestIOReadWriter(t *testing.T) {
	buf := NewLinkBuffer(block1k)
	reader, writer := newIOReader(buf), newIOWriter(buf)
	msg := []byte("hello world")
	n, err := writer.Write(msg)
	MustNil(t, err)
	Equal(t, n, len(msg))

	p := make([]byte, block1k)
	n, err = reader.Read(p)
	MustNil(t, err)
	Equal(t, n, len(msg))
}

func TestIOReadWriter2(t *testing.T) {
	buf := NewLinkBuffer(block1k)
	reader, writer := newIOReader(buf), newIOWriter(buf)
	msg := []byte("hello world")
	n, err := writer.Write(msg)
	MustNil(t, err)
	Equal(t, n, len(msg))

	p, err := ioutil.ReadAll(reader)
	MustNil(t, err)
	Equal(t, len(p), len(msg))
}


================================================
FILE: poll.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package netpoll

// Poll monitors fd(file descriptor), calls the FDOperator to perform specific actions,
// and shields underlying differences. On linux systems, poll uses epoll by default,
// and kevent by default on bsd systems.
type Poll interface {
	// Wait will poll all registered fds, and schedule processing based on the triggered event.
	// The call will block, so the usage can be like:
	//
	//  go wait()
	//
	Wait() error

	// Close the poll and shutdown Wait().
	Close() error

	// Trigger can be used to actively refresh the loop where Wait is located when no event is triggered.
	// On linux systems, eventfd is used by default, and kevent by default on bsd systems.
	Trigger() error

	// Control the event of file descriptor and the operations is defined by PollEvent.
	Control(operator *FDOperator, event PollEvent) error

	// Alloc the operator from cache.
	Alloc() (operator *FDOperator)

	// Free the operator from cache.
	Free(operator *FDOperator)
}

// PollEvent defines the operation of poll.Control.
type PollEvent int

const (
	// PollReadable is used to monitor whether the FDOperator registered by
	// listener and connection is readable or closed.
	PollReadable PollEvent = 0x1

	// PollWritable is used to monitor whether the FDOperator created by the dialer is writable or closed.
	// ET mode must be used (still need to poll hup after being writable)
	PollWritable PollEvent = 0x2

	// PollDetach is used to remove the FDOperator from poll.
	PollDetach PollEvent = 0x3

	// PollR2RW is used to monitor writable for FDOperator,
	// which is only called when the socket write buffer is full.
	PollR2RW PollEvent = 0x5

	// PollRW2R is used to remove the writable monitor of FDOperator, generally used with PollR2RW.
	PollRW2R PollEvent = 0x6
)


================================================
FILE: poll_default.go
================================================
// Copyright 2023 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build darwin || netbsd || freebsd || openbsd || dragonfly || linux
// +build darwin netbsd freebsd openbsd dragonfly linux

package netpoll

func (p *defaultPoll) Alloc() (operator *FDOperator) {
	op := p.opcache.alloc()
	op.poll = p
	return op
}

func (p *defaultPoll) Free(operator *FDOperator) {
	p.opcache.freeable(operator)
}

func (p *defaultPoll) appendHup(operator *FDOperator) {
	p.hups = append(p.hups, operator.OnHup)
	p.detach(operator)
	operator.done()
}

func (p *defaultPoll) detach(operator *FDOperator) {
	if err := operator.Control(PollDetach); err != nil {
		logger.Printf("NETPOLL: poller detach operator failed: %v", err)
	}
}

func (p *defaultPoll) onhups() {
	if len(p.hups) == 0 {
		return
	}
	hups := p.hups
	p.hups = nil
	go func(onhups []func(p Poll) error) {
		for i := range onhups {
			if onhups[i] != nil {
				onhups[i](p)
			}
		}
	}(hups)
}

// readall read all left data before close connection
func readall(op *FDOperator, br barrier) (total int, err error) {
	ivs := br.ivs
	var n int
	for {
		bs := op.Inputs(br.bs)
		if len(bs) == 0 {
			return total, nil
		}

	TryRead:
		n, err = ioread(op.FD, bs, ivs)
		op.InputAck(n)
		total += n
		if err != nil {
			return total, err
		}
		if n == 0 {
			goto TryRead
		}
	}
}


================================================
FILE: poll_default_bsd.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build darwin || netbsd || freebsd || openbsd || dragonfly
// +build darwin netbsd freebsd openbsd dragonfly

package netpoll

import (
	"errors"
	"sync"
	"sync/atomic"
	"syscall"
	"unsafe"
)

func openPoll() (Poll, error) {
	return openDefaultPoll()
}

func openDefaultPoll() (*defaultPoll, error) {
	l := new(defaultPoll)
	p, err := syscall.Kqueue()
	if err != nil {
		return nil, err
	}
	l.fd = p
	_, err = syscall.Kevent(l.fd, []syscall.Kevent_t{{
		Ident:  0,
		Filter: syscall.EVFILT_USER,
		Flags:  syscall.EV_ADD | syscall.EV_CLEAR,
	}}, nil, nil)
	if err != nil {
		syscall.Close(l.fd)
		return nil, err
	}
	l.opcache = newOperatorCache()
	return l, nil
}

type defaultPoll struct {
	fd      int
	trigger uint32
	m       sync.Map       //nolint:unused // only used in go:race
	opcache *operatorCache // operator cache
	hups    []func(p Poll) error
}

// Wait implements Poll.
func (p *defaultPoll) Wait() error {
	// init
	size, caps := 1024, barriercap
	events, barriers := make([]syscall.Kevent_t, size), make([]barrier, size)
	for i := range barriers {
		barriers[i].bs = make([][]byte, caps)
		barriers[i].ivs = make([]syscall.Iovec, caps)
	}
	// wait
	var triggerRead, triggerWrite, triggerHup bool
	for {
		n, err := syscall.Kevent(p.fd, nil, events, nil)
		if err != nil && err != syscall.EINTR {
			// exit gracefully
			if err == syscall.EBADF {
				return nil
			}
			return err
		}
		for i := 0; i < n; i++ {
			fd := int(events[i].Ident)
			// trigger
			if fd == 0 {
				// clean trigger
				atomic.StoreUint32(&p.trigger, 0)
				continue
			}
			operator := p.getOperator(fd, unsafe.Pointer(&events[i].Udata))
			if operator == nil || !operator.do() {
				continue
			}

			var totalRead int
			evt := events[i]
			triggerRead = evt.Filter == syscall.EVFILT_READ && evt.Flags&syscall.EV_ENABLE != 0
			triggerWrite = evt.Filter == syscall.EVFILT_WRITE && evt.Flags&syscall.EV_ENABLE != 0
			triggerHup = evt.Flags&syscall.EV_EOF != 0

			if triggerRead {
				if operator.OnRead != nil {
					// for non-connection
					operator.OnRead(p)
				} else {
					// only for connection
					bs := operator.Inputs(barriers[i].bs)
					if len(bs) > 0 {
						n, err := ioread(operator.FD, bs, barriers[i].ivs)
						operator.InputAck(n)
						totalRead += n
						if err != nil {
							p.appendHup(operator)
							continue
						}
					}
				}
			}
			if triggerHup {
				if triggerRead && operator.Inputs != nil {
					var leftRead int
					// read all left data if peer send and close
					if leftRead, err = readall(operator, barriers[i]); err != nil && !errors.Is(err, ErrEOF) {
						logger.Printf("NETPOLL: readall(fd=%d)=%d before close: %s", operator.FD, total, err.Error())
					}
					totalRead += leftRead
				}
				// only close connection if no further read bytes
				if totalRead == 0 {
					p.appendHup(operator)
					continue
				}
			}
			if triggerWrite {
				if operator.OnWrite != nil {
					// for non-connection
					operator.OnWrite(p)
				} else {
					// only for connection
					bs, supportZeroCopy := operator.Outputs(barriers[i].bs)
					if len(bs) > 0 {
						// TODO: Let the upper layer pass in whether to use ZeroCopy.
						n, err := iosend(operator.FD, bs, barriers[i].ivs, false && supportZeroCopy)
						operator.OutputAck(n)
						if err != nil {
							p.appendHup(operator)
							continue
						}
					}
				}
			}
			operator.done()
		}
		// hup conns together to avoid blocking the poll.
		p.onhups()
		p.opcache.free()
	}
}

// TODO: Close will bad file descriptor here
func (p *defaultPoll) Close() error {
	err := syscall.Close(p.fd)
	return err
}

// Trigger implements Poll.
func (p *defaultPoll) Trigger() error {
	if atomic.AddUint32(&p.trigger, 1) > 1 {
		return nil
	}
	_, err := syscall.Kevent(p.fd, []syscall.Kevent_t{{
		Ident:  0,
		Filter: syscall.EVFILT_USER,
		Fflags: syscall.NOTE_TRIGGER,
	}}, nil, nil)
	return err
}

// Control implements Poll.
func (p *defaultPoll) Control(operator *FDOperator, event PollEvent) error {
	evs := make([]syscall.Kevent_t, 1)
	evs[0].Ident = uint64(operator.FD)
	p.setOperator(unsafe.Pointer(&evs[0].Udata), operator)
	switch event {
	case PollReadable:
		operator.inuse()
		evs[0].Filter, evs[0].Flags = syscall.EVFILT_READ, syscall.EV_ADD|syscall.EV_ENABLE
	case PollWritable:
		operator.inuse()
		evs[0].Filter, evs[0].Flags = syscall.EVFILT_WRITE, syscall.EV_ADD|syscall.EV_ENABLE
	case PollDetach:
		if operator.OnWrite != nil { // means WaitWrite finished
			evs[0].Filter, evs[0].Flags = syscall.EVFILT_WRITE, syscall.EV_DELETE
		} else {
			evs[0].Filter, evs[0].Flags = syscall.EVFILT_READ, syscall.EV_DELETE
		}
		p.delOperator(operator)
	case PollR2RW:
		evs[0].Filter, evs[0].Flags = syscall.EVFILT_WRITE, syscall.EV_ADD|syscall.EV_ENABLE
	case PollRW2R:
		evs[0].Filter, evs[0].Flags = syscall.EVFILT_WRITE, syscall.EV_DELETE
	}
	_, err := syscall.Kevent(p.fd, evs, nil, nil)
	return err
}


================================================
FILE: poll_default_bsd_norace.go
================================================
// Copyright 2023 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build (darwin || netbsd || freebsd || openbsd || dragonfly) && !race
// +build darwin netbsd freebsd openbsd dragonfly
// +build !race

package netpoll

import "unsafe"

func (p *defaultPoll) getOperator(fd int, ptr unsafe.Pointer) *FDOperator {
	return *(**FDOperator)(ptr)
}

func (p *defaultPoll) setOperator(ptr unsafe.Pointer, operator *FDOperator) {
	*(**FDOperator)(ptr) = operator
}

func (p *defaultPoll) delOperator(operator *FDOperator) {
}


================================================
FILE: poll_default_bsd_race.go
================================================
// Copyright 2023 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build (darwin || netbsd || freebsd || openbsd || dragonfly) && race
// +build darwin netbsd freebsd openbsd dragonfly
// +build race

package netpoll

import "unsafe"

func (p *defaultPoll) getOperator(fd int, ptr unsafe.Pointer) *FDOperator {
	tmp, _ := p.m.Load(fd)
	if tmp == nil {
		return nil
	}
	return tmp.(*FDOperator)
}

func (p *defaultPoll) setOperator(ptr unsafe.Pointer, operator *FDOperator) {
	p.m.Store(operator.FD, operator)
}

func (p *defaultPoll) delOperator(operator *FDOperator) {
	p.m.Delete(operator.FD)
}


================================================
FILE: poll_default_linux.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package netpoll

import (
	"errors"
	"runtime"
	"sync"
	"sync/atomic"
	"syscall"
	"unsafe"
)

func openPoll() (Poll, error) {
	return openDefaultPoll()
}

func openDefaultPoll() (*defaultPoll, error) {
	poll := new(defaultPoll)

	poll.buf = make([]byte, 8)
	p, err := EpollCreate(0)
	if err != nil {
		return nil, err
	}
	poll.fd = p

	r0, _, e0 := syscall.Syscall(syscall.SYS_EVENTFD2, 0, 0, 0)
	if e0 != 0 {
		_ = syscall.Close(poll.fd)
		return nil, e0
	}

	poll.Reset = poll.reset
	poll.Handler = poll.handler
	poll.wop = &FDOperator{FD: int(r0)}

	if err = poll.Control(poll.wop, PollReadable); err != nil {
		_ = syscall.Close(poll.wop.FD)
		_ = syscall.Close(poll.fd)
		return nil, err
	}

	poll.opcache = newOperatorCache()
	return poll, nil
}

type defaultPoll struct {
	pollArgs
	fd      int            // epoll fd
	wop     *FDOperator    // eventfd, wake epoll_wait
	buf     []byte         // read wfd trigger msg
	trigger uint32         // trigger flag
	m       sync.Map       //nolint:unused // only used in go:race
	opcache *operatorCache // operator cache
	// fns for handle events
	Reset   func(size, caps int)
	Handler func(events []epollevent) (closed bool)
}

type pollArgs struct {
	size     int
	caps     int
	events   []epollevent
	barriers []barrier
	hups     []func(p Poll) error
}

func (a *pollArgs) reset(size, caps int) {
	a.size, a.caps = size, caps
	a.events, a.barriers = make([]epollevent, size), make([]barrier, size)
	for i := range a.barriers {
		a.barriers[i].bs = make([][]byte, a.caps)
		a.barriers[i].ivs = make([]syscall.Iovec, a.caps)
	}
}

// Wait implements Poll.
func (p *defaultPoll) Wait() (err error) {
	// init
	caps, msec, n := barriercap, -1, 0
	p.Reset(128, caps)
	// wait
	for {
		if n == p.size && p.size < 128*1024 {
			p.Reset(p.size<<1, caps)
		}
		n, err = EpollWait(p.fd, p.events, msec)
		if err != nil && err != syscall.EINTR {
			return err
		}
		if n <= 0 {
			msec = -1
			runtime.Gosched()
			continue
		}
		msec = 0
		if p.Handler(p.events[:n]) {
			return nil
		}
		// we can make sure that there is no op remaining if Handler finished
		p.opcache.free()
	}
}

func (p *defaultPoll) handler(events []epollevent) (closed bool) {
	var triggerRead, triggerWrite, triggerHup, triggerError bool
	var err error
	for i := range events {
		operator := p.getOperator(0, unsafe.Pointer(&events[i].data))
		if operator == nil || !operator.do() {
			continue
		}

		var totalRead int
		evt := events[i].events
		triggerRead = evt&syscall.EPOLLIN != 0
		triggerWrite = evt&syscall.EPOLLOUT != 0
		triggerHup = evt&(syscall.EPOLLHUP|syscall.EPOLLRDHUP) != 0
		triggerError = evt&syscall.EPOLLERR != 0

		// trigger or exit gracefully
		if operator.FD == p.wop.FD {
			// must clean trigger first
			syscall.Read(p.wop.FD, p.buf)
			atomic.StoreUint32(&p.trigger, 0)
			// if closed & exit
			if p.buf[0] > 0 {
				syscall.Close(p.wop.FD)
				syscall.Close(p.fd)
				operator.done()
				return true
			}
			operator.done()
			continue
		}

		if triggerRead {
			if operator.OnRead != nil {
				// for non-connection
				operator.OnRead(p)
			} else if operator.Inputs != nil {
				// for connection
				bs := operator.Inputs(p.barriers[i].bs)
				if len(bs) > 0 {
					n, err := ioread(operator.FD, bs, p.barriers[i].ivs)
					operator.InputAck(n)
					totalRead += n
					if err != nil {
						p.appendHup(operator)
						continue
					}
				}
			} else {
				logger.Printf("NETPOLL: operator has critical problem! event=%d operator=%v", evt, operator)
			}
		}
		if triggerHup {
			if triggerRead && operator.Inputs != nil {
				// read all left data if peer send and close
				var leftRead int
				// read all left data if peer send and close
				if leftRead, err = readall(operator, p.barriers[i]); err != nil && !errors.Is(err, ErrEOF) {
					logger.Printf("NETPOLL: readall(fd=%d)=%d before close: %s", operator.FD, totalRead, err.Error())
				}
				totalRead += leftRead
			}
			// only close connection if no further read bytes
			if totalRead == 0 {
				p.appendHup(operator)
				continue
			}
		}
		if triggerError {
			// Under block-zerocopy, the kernel may give an error callback, which is not a real error, just an EAGAIN.
			// So here we need to check this error, if it is EAGAIN then do nothing, otherwise still mark as hup.
			if _, _, _, _, err := syscall.Recvmsg(operator.FD, nil, nil, syscall.MSG_ERRQUEUE); err != syscall.EAGAIN {
				p.appendHup(operator)
			} else {
				operator.done()
			}
			continue
		}
		if triggerWrite {
			if operator.OnWrite != nil {
				// for non-connection
				operator.OnWrite(p)
			} else if operator.Outputs != nil {
				// for connection
				bs, _ := operator.Outputs(p.barriers[i].bs)
				if len(bs) > 0 {
					n, err := iosend(operator.FD, bs, p.barriers[i].ivs, false)
					operator.OutputAck(n)
					if err != nil {
						p.appendHup(operator)
						continue
					}
				}
			} else {
				logger.Printf("NETPOLL: operator has critical problem! event=%d operator=%v", evt, operator)
			}
		}
		operator.done()
	}
	// hup conns together to avoid blocking the poll.
	p.onhups()
	return false
}

// Close will write 10000000
func (p *defaultPoll) Close() error {
	_, err := syscall.Write(p.wop.FD, []byte{1, 0, 0, 0, 0, 0, 0, 0})
	return err
}

// Trigger implements Poll.
func (p *defaultPoll) Trigger() error {
	if atomic.AddUint32(&p.trigger, 1) > 1 {
		return nil
	}
	// MAX(eventfd) = 0xfffffffffffffffe
	_, err := syscall.Write(p.wop.FD, []byte{0, 0, 0, 0, 0, 0, 0, 1})
	return err
}

// Control implements Poll.
func (p *defaultPoll) Control(operator *FDOperator, event PollEvent) error {
	// DON'T move `fd=operator.FD` behind inuse() call, we can only access operator before op.inuse() for avoid race
	// G1:              G2:
	// op.inuse()       op.unused()
	// op.FD  -- T1     op.FD = 0  -- T2
	// T1 and T2 may happen together
	fd := operator.FD
	var op int
	var evt epollevent
	p.setOperator(unsafe.Pointer(&evt.data), operator)
	switch event {
	case PollReadable: // server accept a new connection and wait read
		operator.inuse()
		op, evt.events = syscall.EPOLL_CTL_ADD, syscall.EPOLLIN|syscall.EPOLLRDHUP|syscall.EPOLLERR
	case PollWritable: // client create a new connection and wait connect finished
		operator.inuse()
		op, evt.events = syscall.EPOLL_CTL_ADD, EPOLLET|syscall.EPOLLOUT|syscall.EPOLLRDHUP|syscall.EPOLLERR
	case PollDetach: // deregister
		p.delOperator(operator)
		op, evt.events = syscall.EPOLL_CTL_DEL, syscall.EPOLLIN|syscall.EPOLLOUT|syscall.EPOLLRDHUP|syscall.EPOLLERR
	case PollR2RW: // connection wait read/write
		op, evt.events = syscall.EPOLL_CTL_MOD, syscall.EPOLLIN|syscall.EPOLLOUT|syscall.EPOLLRDHUP|syscall.EPOLLERR
	case PollRW2R: // connection wait read
		op, evt.events = syscall.EPOLL_CTL_MOD, syscall.EPOLLIN|syscall.EPOLLRDHUP|syscall.EPOLLERR
	}
	return EpollCtl(p.fd, op, fd, &evt)
}


================================================
FILE: poll_default_linux_norace.go
================================================
// Copyright 2023 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build linux && !race
// +build linux,!race

package netpoll

import "unsafe"

func (p *defaultPoll) getOperator(fd int, ptr unsafe.Pointer) *FDOperator {
	return *(**FDOperator)(ptr)
}

func (p *defaultPoll) setOperator(ptr unsafe.Pointer, operator *FDOperator) {
	*(**FDOperator)(ptr) = operator
}

func (p *defaultPoll) delOperator(operator *FDOperator) {
}


================================================
FILE: poll_default_linux_race.go
================================================
// Copyright 2023 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build linux && race
// +build linux,race

package netpoll

import "unsafe"

type eventdata struct {
	fd  int32
	pad int32
}

func (p *defaultPoll) getOperator(fd int, ptr unsafe.Pointer) *FDOperator {
	data := *(*eventdata)(ptr)
	tmp, _ := p.m.Load(int(data.fd))
	if tmp == nil {
		return nil
	}
	return tmp.(*FDOperator)
}

func (p *defaultPoll) setOperator(ptr unsafe.Pointer, operator *FDOperator) {
	*(*eventdata)(ptr) = eventdata{fd: int32(operator.FD)}
	p.m.Store(operator.FD, operator)
}

func (p *defaultPoll) delOperator(operator *FDOperator) {
	p.m.Delete(operator.FD)
}


================================================
FILE: poll_default_linux_test.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build linux
// +build linux

package netpoll

import (
	"context"
	"errors"
	"syscall"
	"testing"

	"golang.org/x/sys/unix"
)

func TestEpollEvent(t *testing.T) {
	epollfd, err := EpollCreate(0)
	MustNil(t, err)
	defer syscall.Close(epollfd)

	rfd, wfd := GetSysFdPairs()
	defer syscall.Close(rfd)
	defer syscall.Close(wfd)

	send := []byte("hello")
	recv := make([]byte, 5)
	events := make([]epollevent, 128)
	eventdata1 := [8]byte{0, 0, 0, 0, 0, 0, 0, 1}
	eventdata2 := [8]byte{0, 0, 0, 0, 0, 0, 0, 2}
	eventdata3 := [8]byte{0, 0, 0, 0, 0, 0, 0, 3}
	event1 := &epollevent{
		events: syscall.EPOLLIN,
		data:   eventdata1,
	}
	event2 := &epollevent{
		events: syscall.EPOLLIN,
		data:   eventdata2,
	}
	event3 := &epollevent{
		events: syscall.EPOLLIN | syscall.EPOLLOUT,
		data:   eventdata3,
	}

	// EPOLL: add ,del and add
	err = EpollCtl(epollfd, unix.EPOLL_CTL_ADD, rfd, event1)
	MustNil(t, err)
	err = EpollCtl(epollfd, unix.EPOLL_CTL_DEL, rfd, event1)
	MustNil(t, err)
	err = EpollCtl(epollfd, unix.EPOLL_CTL_ADD, rfd, event2)
	MustNil(t, err)
	_, err = syscall.Write(wfd, send)
	MustNil(t, err)
	n, err := epollWaitUntil(epollfd, events, -1)
	MustNil(t, err)
	Equal(t, n, 1)
	Equal(t, events[0].data, eventdata2)
	_, err = syscall.Read(rfd, recv)
	MustTrue(t, err == nil && string(recv) == string(send))
	err = EpollCtl(epollfd, unix.EPOLL_CTL_DEL, rfd, event2)
	MustNil(t, err)

	// EPOLL: add ,mod and mod
	err = EpollCtl(epollfd, unix.EPOLL_CTL_ADD, rfd, event1)
	MustNil(t, err)
	err = EpollCtl(epollfd, unix.EPOLL_CTL_MOD, rfd, event2)
	MustNil(t, err)
	err = EpollCtl(epollfd, unix.EPOLL_CTL_MOD, rfd, event3)
	MustNil(t, err)
	_, err = syscall.Write(wfd, send)
	MustNil(t, err)
	n, err = epollWaitUntil(epollfd, events, -1)
	MustNil(t, err)
	Equal(t, n, 1)
	Equal(t, events[0].data, eventdata3)
	_, err = syscall.Read(rfd, recv)
	MustTrue(t, err == nil && string(recv) == string(send))
	Assert(t, events[0].events&syscall.EPOLLIN != 0)
	Assert(t, events[0].events&syscall.EPOLLOUT != 0)

	err = EpollCtl(epollfd, unix.EPOLL_CTL_DEL, rfd, event2)
	MustNil(t, err)
}

func TestEpollWait(t *testing.T) {
	epollfd, err := EpollCreate(0)
	MustNil(t, err)
	defer syscall.Close(epollfd)

	rfd, wfd := GetSysFdPairs()
	defer syscall.Close(wfd)

	send := []byte("hello")
	recv := make([]byte, 5)
	events := make([]epollevent, 128)
	eventdata := [8]byte{0, 0, 0, 0, 0, 0, 0, 1}

	// EPOLL: init state
	event := &epollevent{
		events: syscall.EPOLLIN | syscall.EPOLLOUT | syscall.EPOLLRDHUP | syscall.EPOLLERR,
		data:   eventdata,
	}
	err = EpollCtl(epollfd, unix.EPOLL_CTL_ADD, rfd, event)
	MustNil(t, err)
	_, err = epollWaitUntil(epollfd, events, -1)
	MustNil(t, err)
	Assert(t, events[0].events&syscall.EPOLLIN == 0)
	Assert(t, events[0].events&syscall.EPOLLOUT != 0)

	// EPOLL: readable
	_, err = syscall.Write(wfd, send)
	MustNil(t, err)
	_, err = epollWaitUntil(epollfd, events, -1)
	MustNil(t, err)
	Assert(t, events[0].events&syscall.EPOLLIN != 0)
	Assert(t, events[0].events&syscall.EPOLLOUT != 0)
	_, err = syscall.Read(rfd, recv)
	MustTrue(t, err == nil && string(recv) == string(send))

	// EPOLL: read finished
	_, err = epollWaitUntil(epollfd, events, -1)
	MustNil(t, err)
	Assert(t, events[0].events&syscall.EPOLLIN == 0)
	Assert(t, events[0].events&syscall.EPOLLOUT != 0)

	// EPOLL: close peer fd
	err = syscall.Close(wfd)
	MustNil(t, err)
	_, err = epollWaitUntil(epollfd, events, -1)
	MustNil(t, err)
	Assert(t, events[0].events&syscall.EPOLLIN != 0)
	Assert(t, events[0].events&syscall.EPOLLOUT != 0)
	Assert(t, events[0].events&syscall.EPOLLRDHUP != 0)
	Assert(t, events[0].events&syscall.EPOLLERR == 0)

	// EPOLL: close current fd
	rfd2, wfd2 := GetSysFdPairs()
	defer syscall.Close(wfd2)
	err = EpollCtl(epollfd, unix.EPOLL_CTL_ADD, rfd2, event)
	MustNil(t, err)
	err = syscall.Close(rfd2)
	MustNil(t, err)
	_, err = epollWaitUntil(epollfd, events, -1)
	MustNil(t, err)
	Assert(t, events[0].events&syscall.EPOLLIN != 0)
	Assert(t, events[0].events&syscall.EPOLLOUT != 0)
	Assert(t, events[0].events&syscall.EPOLLRDHUP != 0)
	Assert(t, events[0].events&syscall.EPOLLERR == 0)

	err = EpollCtl(epollfd, unix.EPOLL_CTL_DEL, rfd, event)
	MustNil(t, err)
}

func TestEpollETClose(t *testing.T) {
	epollfd, err := EpollCreate(0)
	MustNil(t, err)
	defer syscall.Close(epollfd)
	rfd, wfd := GetSysFdPairs()
	events := make([]epollevent, 128)
	eventdata := [8]byte{0, 0, 0, 0, 0, 0, 0, 1}
	event := &epollevent{
		events: EPOLLET | syscall.EPOLLIN | syscall.EPOLLOUT | syscall.EPOLLRDHUP | syscall.EPOLLERR,
		data:   eventdata,
	}

	// EPOLL: init state
	err = EpollCtl(epollfd, unix.EPOLL_CTL_ADD, rfd, event)
	MustNil(t, err)
	_, err = epollWaitUntil(epollfd, events, -1)
	MustNil(t, err)
	Assert(t, events[0].events&syscall.EPOLLIN == 0)
	Assert(t, events[0].events&syscall.EPOLLOUT != 0)
	Assert(t, events[0].events&syscall.EPOLLRDHUP == 0)
	Assert(t, events[0].events&syscall.EPOLLERR == 0)

	// EPOLL: close current fd
	// nothing will happen
	err = syscall.Close(rfd)
	MustNil(t, err)
	n, err := epollWaitUntil(epollfd, events, 100)
	MustNil(t, err)
	Assert(t, n == 0, n)
	err = syscall.Close(wfd)
	MustNil(t, err)

	// EPOLL: close peer fd
	// EPOLLIN and EPOLLOUT
	rfd, wfd = GetSysFdPairs()
	err = EpollCtl(epollfd, unix.EPOLL_CTL_ADD, rfd, event)
	MustNil(t, err)
	err = syscall.Close(wfd)
	MustNil(t, err)
	n, err = epollWaitUntil(epollfd, events, 100)
	MustNil(t, err)
	Assert(t, n == 1, n)
	Assert(t, events[0].events&syscall.EPOLLIN != 0)
	Assert(t, events[0].events&syscall.EPOLLOUT != 0)
	Assert(t, events[0].events&syscall.EPOLLRDHUP != 0)
	Assert(t, events[0].events&syscall.EPOLLERR == 0)
	buf := make([]byte, 1024)
	ivs := make([]syscall.Iovec, 1)
	n, err = ioread(rfd, [][]byte{buf}, ivs) // EOF
	Assert(t, n == 0 && errors.Is(err, ErrEOF), n, err)
}

func TestEpollETDel(t *testing.T) {
	epollfd, err := EpollCreate(0)
	MustNil(t, err)
	defer syscall.Close(epollfd)
	rfd, wfd := GetSysFdPairs()
	send := []byte("hello")
	events := make([]epollevent, 128)
	eventdata := [8]byte{0, 0, 0, 0, 0, 0, 0, 1}
	event := &epollevent{
		events: EPOLLET | syscall.EPOLLIN | syscall.EPOLLRDHUP | syscall.EPOLLERR,
		data:   eventdata,
	}

	// EPOLL: del partly
	err = EpollCtl(epollfd, unix.EPOLL_CTL_ADD, rfd, event)
	MustNil(t, err)
	event.events = syscall.EPOLLIN | syscall.EPOLLOUT | syscall.EPOLLRDHUP | syscall.EPOLLERR
	err = EpollCtl(epollfd, unix.EPOLL_CTL_DEL, rfd, event)
	MustNil(t, err)
	_, err = syscall.Write(wfd, send)
	MustNil(t, err)
	_, err = epollWaitUntil(epollfd, events, 100)
	MustNil(t, err)
	Assert(t, events[0].events&syscall.EPOLLIN == 0)
	Assert(t, events[0].events&syscall.EPOLLRDHUP == 0)
	Assert(t, events[0].events&syscall.EPOLLERR == 0)
}

func TestEpollConnectSameFD(t *testing.T) {
	addr := syscall.SockaddrInet4{
		Port: 12345,
		Addr: [4]byte{127, 0, 0, 1},
	}
	loop := newTestEventLoop("tcp", "127.0.0.1:12345",
		func(ctx context.Context, connection Connection) error {
			_, err := connection.Reader().Next(connection.Reader().Len())
			return err
		},
	)
	defer loop.Shutdown(context.Background())

	epollfd, err := EpollCreate(0)
	MustNil(t, err)
	defer syscall.Close(epollfd)
	events := make([]epollevent, 128)
	eventdata1 := [8]byte{0, 0, 0, 0, 0, 0, 0, 1}
	eventdata2 := [8]byte{0, 0, 0, 0, 0, 0, 0, 2}
	event1 := &epollevent{
		events: EPOLLET | syscall.EPOLLOUT | syscall.EPOLLRDHUP | syscall.EPOLLERR,
		data:   eventdata1,
	}
	event2 := &epollevent{
		events: EPOLLET | syscall.EPOLLOUT | syscall.EPOLLRDHUP | syscall.EPOLLERR,
		data:   eventdata2,
	}
	eventin := &epollevent{
		events: syscall.EPOLLIN | syscall.EPOLLRDHUP | syscall.EPOLLERR,
		data:   eventdata1,
	}

	// connect non-block socket
	fd1, err := syscall.Socket(syscall.AF_INET, syscall.SOCK_STREAM, syscall.IPPROTO_TCP)
	MustNil(t, err)
	t.Logf("create fd: %d", fd1)
	err = syscall.SetNonblock(fd1, true)
	MustNil(t, err)
	err = EpollCtl(epollfd, unix.EPOLL_CTL_ADD, fd1, event1)
	MustNil(t, err)
	err = syscall.Connect(fd1, &addr)
	t.Log(err) // EINPROGRESS
	_, err = epollWaitUntil(epollfd, events, -1)
	MustNil(t, err)
	Assert(t, events[0].events&syscall.EPOLLOUT != 0)
	Assert(t, events[0].events&syscall.EPOLLRDHUP == 0)
	Assert(t, events[0].events&syscall.EPOLLERR == 0)
	// forget to del fd
	// err = EpollCtl(epollfd, unix.EPOLL_CTL_DEL, fd1, event1)
	// MustNil(t, err)
	err = syscall.Close(fd1) // close fd1
	MustNil(t, err)

	// connect non-block socket with same fd
	fd2, err := syscall.Socket(syscall.AF_INET, syscall.SOCK_STREAM, syscall.IPPROTO_TCP)
	MustNil(t, err)
	t.Logf("create fd: %d", fd2)
	err = syscall.SetNonblock(fd2, true)
	MustNil(t, err)
	err = EpollCtl(epollfd, unix.EPOLL_CTL_ADD, fd2, event2)
	MustNil(t, err)
	err = syscall.Connect(fd2, &addr)
	t.Log(err) // EINPROGRESS
	_, err = epollWaitUntil(epollfd, events, -1)
	MustNil(t, err)
	Assert(t, events[0].events&syscall.EPOLLOUT != 0)
	Assert(t, events[0].events&syscall.EPOLLRDHUP == 0)
	Assert(t, events[0].events&syscall.EPOLLERR == 0)
	err = EpollCtl(epollfd, unix.EPOLL_CTL_DEL, fd2, event2)
	MustNil(t, err)
	err = syscall.Close(fd2) // close fd2
	MustNil(t, err)
	Equal(t, events[0].data, eventdata2)

	// no event after close fd
	fd3, err := syscall.Socket(syscall.AF_INET, syscall.SOCK_STREAM, syscall.IPPROTO_TCP)
	MustNil(t, err)
	t.Logf("create fd: %d", fd3)
	err = syscall.SetNonblock(fd3, true)
	MustNil(t, err)
	err = EpollCtl(epollfd, unix.EPOLL_CTL_ADD, fd3, event1)
	MustNil(t, err)
	err = syscall.Connect(fd3, &addr)
	t.Log(err) // EINPROGRESS
	_, err = epollWaitUntil(epollfd, events, -1)
	MustNil(t, err)
	Assert(t, events[0].events&syscall.EPOLLOUT != 0)
	Assert(t, events[0].events&syscall.EPOLLRDHUP == 0)
	Assert(t, events[0].events&syscall.EPOLLERR == 0)
	MustNil(t, err)
	err = EpollCtl(epollfd, unix.EPOLL_CTL_MOD, fd3, eventin)
	MustNil(t, err)
	err = syscall.Close(fd3) // close fd3
	MustNil(t, err)
	n, err := epollWaitUntil(epollfd, events, 100)
	MustNil(t, err)
	Assert(t, n == 0)
}

func epollWaitUntil(epfd int, events []epollevent, msec int) (n int, err error) {
WAIT:
	n, err = EpollWait(epfd, events, msec)
	if err == syscall.EINTR {
		goto WAIT
	}
	return n, err
}


================================================
FILE: poll_loadbalance.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package netpoll

import (
	"sync/atomic"

	"github.com/bytedance/gopkg/lang/fastrand"
)

// LoadBalance sets the load balancing method.
type LoadBalance int

const (
	// RoundRobin requests that connections are distributed to a Poll
	// in a round-robin fashion.
	RoundRobin LoadBalance = iota
	// Random requests that connections are randomly distributed.
	Random
)

// loadbalance sets the load balancing method for []*polls
type loadbalance interface {
	LoadBalance() LoadBalance
	// Pick choose the most qualified Poll
	Pick() (poll Poll)

	Rebalance(polls []Poll)
}

func newLoadbalance(lb LoadBalance, polls []Poll) loadbalance {
	switch lb {
	case RoundRobin:
		return newRoundRobinLB(polls)
	case Random:
		return newRandomLB(polls)
	}
	return newRoundRobinLB(polls)
}

func newRandomLB(polls []Poll) loadbalance {
	return &randomLB{polls: polls, pollSize: len(polls)}
}

type randomLB struct {
	polls    []Poll
	pollSize int
}

func (b *randomLB) LoadBalance() LoadBalance {
	return Random
}

func (b *randomLB) Pick() (poll Poll) {
	idx := fastrand.Intn(b.pollSize)
	return b.polls[idx]
}

func (b *randomLB) Rebalance(polls []Poll) {
	b.polls, b.pollSize = polls, len(polls)
}

func newRoundRobinLB(polls []Poll) loadbalance {
	return &roundRobinLB{polls: polls, pollSize: len(polls)}
}

type roundRobinLB struct {
	polls    []Poll
	accepted uintptr // accept counter
	pollSize int
}

func (b *roundRobinLB) LoadBalance() LoadBalance {
	return RoundRobin
}

func (b *roundRobinLB) Pick() (poll Poll) {
	idx := int(atomic.AddUintptr(&b.accepted, 1)) % b.pollSize
	return b.polls[idx]
}

func (b *roundRobinLB) Rebalance(polls []Poll) {
	b.polls, b.pollSize = polls, len(polls)
}


================================================
FILE: poll_manager.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows
// +build !windows

package netpoll

import (
	"fmt"
	"runtime"
	"sync/atomic"
)

const (
	managerUninitialized = iota
	managerInitializing
	managerInitialized
)

func newManager(numLoops int) *manager {
	m := new(manager)
	m.SetLoadBalance(RoundRobin)
	m.SetNumLoops(numLoops)
	return m
}

// LoadBalance is used to do load balancing among multiple pollers.
// a single poller may not be optimal if the number of cores is large (40C+).
type manager struct {
	numLoops int32
	status   int32       // 0: uninitialized, 1: initializing, 2: initialized
	balance  loadbalance // load balancing method
	polls    []Poll      // all the polls
}

// SetNumLoops will return error when set numLoops < 1
func (m *manager) SetNumLoops(numLoops int) (err error) {
	if numLoops < 1 {
		return fmt.Errorf("set invalid numLoops[%d]", numLoops)
	}
	// note: set new numLoops first and then change the status
	atomic.StoreInt32(&m.numLoops, int32(numLoops))
	atomic.StoreInt32(&m.status, managerUninitialized)
	return nil
}

// SetLoadBalance set load balance.
func (m *manager) SetLoadBalance(lb LoadBalance) error {
	if m.balance != nil && m.balance.LoadBalance() == lb {
		return nil
	}
	m.balance = newLoadbalance(lb, m.polls)
	return nil
}

// Close release all resources.
func (m *manager) Close() (err error) {
	for _, poll := range m.polls {
		err = poll.Close()
	}
	m.numLoops = 0
	m.balance = nil
	m.polls = nil
	return err
}

// Run all pollers.
func (m *manager) Run() (err error) {
	defer func() {
		if err != nil {
			_ = m.Close()
		}
	}()

	numLoops := int(atomic.LoadInt32(&m.numLoops))
	if numLoops == len(m.polls) {
		return nil
	}
	polls := make([]Poll, numLoops)
	if numLoops < len(m.polls) {
		// shrink polls
		copy(polls, m.polls[:numLoops])
		for idx := numLoops; idx < len(m.polls); idx++ {
			// close redundant polls
			if err = m.polls[idx].Close(); err != nil {
				logger.Printf("NETPOLL: poller close failed: %v\n", err)
			}
		}
	} else {
		// growth polls
		copy(polls, m.polls)
		for idx := len(m.polls); idx < numLoops; idx++ {
			var poll Poll
			poll, err = openPoll()
			if err != nil {
				return err
			}
			polls[idx] = poll
			go poll.Wait()
		}
	}
	m.polls = polls

	// LoadBalance must be set before calling Run, otherwise it will panic.
	m.balance.Rebalance(m.polls)
	return nil
}

// Reset pollers, this operation is very dangerous, please make sure to do this when calling !
func (m *manager) Reset() error {
	for _, poll := range m.polls {
		poll.Close()
	}
	m.polls = nil
	return m.Run()
}

// Pick will select the poller for use each time based on the LoadBalance.
func (m *manager) Pick() Poll {
START:
	// fast path
	if atomic.LoadInt32(&m.status) == managerInitialized {
		return m.balance.Pick()
	}
	// slow path
	// try to get initializing lock failed, wait others finished the init work, and try again
	if !atomic.CompareAndSwapInt32(&m.status, managerUninitialized, managerInitializing) {
		runtime.Gosched()
		goto START
	}
	// adjust polls
	// m.Run() will finish very quickly, so will not many goroutines block on Pick.
	_ = m.Run()

	//nolint:staticcheck // SA9003: empty branch
	if !atomic.CompareAndSwapInt32(&m.status, managerInitializing, managerInitialized) {
		// SetNumLoops called during m.Run() which cause CAS failed
		// The polls will be adjusted next Pick
	}
	return m.balance.Pick()
}


================================================
FILE: poll_manager_test.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows
// +build !windows

package netpoll

import (
	"runtime"
	"sync"
	"testing"
)

func TestPollManager(t *testing.T) {
	r, w := GetSysFdPairs()
	rconn, wconn := &connection{}, &connection{}
	err := rconn.init(&netFD{fd: r}, nil)
	MustNil(t, err)
	err = wconn.init(&netFD{fd: w}, nil)
	MustNil(t, err)

	msg := []byte("hello world")
	n, err := wconn.Write(msg)
	MustNil(t, err)
	Equal(t, n, len(msg))

	p, err := rconn.Reader().Next(n)
	MustNil(t, err)
	Equal(t, string(p), string(msg))

	err = wconn.Close()
	MustNil(t, err)
	for rconn.IsActive() || wconn.IsActive() {
		runtime.Gosched()
	}
}

func TestPollManagerReset(t *testing.T) {
	n := pollmanager.numLoops
	err := pollmanager.Reset()
	MustNil(t, err)
	Equal(t, len(pollmanager.polls), int(n))
}

func TestPollManagerSetNumLoops(t *testing.T) {
	pm := newManager(1)

	startGs := runtime.NumGoroutine()
	poll := pm.Pick()
	newGs := runtime.NumGoroutine()
	Assert(t, poll != nil)
	t.Logf("old=%d, new=%d", startGs, newGs)
	// FIXME: it's unstable due to background goroutines created by other tests
	// Assert(t, newGs-startGs == 1)

	// change pollers
	oldGs := newGs
	err := pm.SetNumLoops(100)
	MustNil(t, err)
	newGs = runtime.NumGoroutine()
	t.Logf("old=%d, new=%d", oldGs, newGs)
	// Assert(t, newGs == oldGs)

	// trigger polls adjustment
	var wg sync.WaitGroup
	finish := make(chan struct{})
	for i := 0; i < 32; i++ {
		wg.Add(1)
		go func() {
			poll := pm.Pick()
			Assert(t, poll != nil)
			Assert(t, len(pm.polls) == 100)
			wg.Done()
			<-finish // hold goroutines
		}()
	}
	wg.Wait()
	close(finish)
}


================================================
FILE: poll_test.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows
// +build !windows

package netpoll

import (
	"runtime"
	"sync"
	"sync/atomic"
	"syscall"
	"testing"
	"time"
)

// Trigger has been validated, but no usage for now.
func TestPollTrigger(t *testing.T) {
	t.Skip()
	var trigger int
	stop := make(chan error)
	p, err := openDefaultPoll()
	MustNil(t, err)

	go func() {
		stop <- p.Wait()
	}()

	time.Sleep(time.Millisecond)
	Equal(t, trigger, 0)
	p.Trigger()
	time.Sleep(time.Millisecond)
	Equal(t, trigger, 1)
	p.Trigger()
	time.Sleep(time.Millisecond)
	Equal(t, trigger, 2)

	p.Close()
	err = <-stop
	MustNil(t, err)
}

func TestPollMod(t *testing.T) {
	var rn, wn, hn int32
	read := func(p Poll) error {
		atomic.AddInt32(&rn, 1)
		return nil
	}
	write := func(p Poll) error {
		atomic.AddInt32(&wn, 1)
		return nil
	}
	hup := func(p Poll) error {
		atomic.AddInt32(&hn, 1)
		return nil
	}
	stop := make(chan error)
	p, err := openDefaultPoll()
	MustNil(t, err)
	go func() {
		stop <- p.Wait()
	}()

	rfd, wfd := GetSysFdPairs()
	rop := &FDOperator{FD: rfd, OnRead: read, OnWrite: write, OnHup: hup, poll: p}
	wop := &FDOperator{FD: wfd, OnRead: read, OnWrite: write, OnHup: hup, poll: p}
	var r, w, h int32
	r, w, h = atomic.LoadInt32(&rn), atomic.LoadInt32(&wn), atomic.LoadInt32(&hn)
	Assert(t, r == 0 && w == 0 && h == 0, r, w, h)
	err = p.Control(rop, PollReadable)
	MustNil(t, err)
	r, w, h = atomic.LoadInt32(&rn), atomic.LoadInt32(&wn), atomic.LoadInt32(&hn)
	Assert(t, r == 0 && w == 0 && h == 0, r, w, h)

	err = p.Control(wop, PollWritable) // trigger one shot
	MustNil(t, err)
	for atomic.LoadInt32(&wn) == 0 {
		runtime.Gosched()
	}
	r, w, h = atomic.LoadInt32(&rn), atomic.LoadInt32(&wn), atomic.LoadInt32(&hn)
	Assert(t, r == 0 && w >= 1 && h == 0, r, w, h)

	err = p.Control(rop, PollR2RW) // trigger write
	MustNil(t, err)
	for atomic.LoadInt32(&wn) <= 1 {
		runtime.Gosched()
	}
	r, w, h = atomic.LoadInt32(&rn), atomic.LoadInt32(&wn), atomic.LoadInt32(&hn)
	Assert(t, r == 0 && w >= 2 && h == 0, r, w, h)

	// close wfd, then trigger hup rfd
	err = syscall.Close(wfd) // trigger hup
	MustNil(t, err)
	for atomic.LoadInt32(&hn) == 0 {
		runtime.Gosched()
	}
	w, h = atomic.LoadInt32(&wn), atomic.LoadInt32(&hn)
	Assert(t, w >= 2 && h >= 1, r, w, h)

	p.Close()
	err = <-stop
	MustNil(t, err)
}

func TestPollClose(t *testing.T) {
	p, err := openDefaultPoll()
	MustNil(t, err)
	var wg sync.WaitGroup
	wg.Add(1)
	go func() {
		p.Wait()
		wg.Done()
	}()
	p.Close()
	wg.Wait()
}

func BenchmarkPollMod(b *testing.B) {
	b.StopTimer()
	p, _ := openDefaultPoll()
	r, _ := GetSysFdPairs()
	operator := &FDOperator{FD: r}
	p.Control(operator, PollReadable)

	// benchmark
	b.ReportAllocs()
	b.StartTimer()
	for i := 0; i < b.N; i++ {
		p.Control(operator, PollR2RW)
	}
}


================================================
FILE: sys_epoll_linux.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !arm64 && !loong64
// +build !arm64,!loong64

package netpoll

import (
	"syscall"
	"unsafe"
)

const EPOLLET = -syscall.EPOLLET

type epollevent struct {
	events uint32
	data   [8]byte // unaligned uintptr
}

// EpollCreate implements epoll_create1.
func EpollCreate(flag int) (fd int, err error) {
	var r0 uintptr
	r0, _, err = syscall.RawSyscall(syscall.SYS_EPOLL_CREATE1, uintptr(flag), 0, 0)
	if err == syscall.Errno(0) {
		err = nil
	}
	return int(r0), err
}

// EpollCtl implements epoll_ctl.
func EpollCtl(epfd, op, fd int, event *epollevent) (err error) {
	_, _, err = syscall.RawSyscall6(syscall.SYS_EPOLL_CTL, uintptr(epfd), uintptr(op), uintptr(fd), uintptr(unsafe.Pointer(event)), 0, 0)
	if err == syscall.Errno(0) {
		err = nil
	}
	return err
}

// EpollWait implements epoll_wait.
func EpollWait(epfd int, events []epollevent, msec int) (n int, err error) {
	var r0 uintptr
	_p0 := unsafe.Pointer(&events[0])
	if msec == 0 {
		r0, _, err = syscall.RawSyscall6(syscall.SYS_EPOLL_WAIT, uintptr(epfd), uintptr(_p0), uintptr(len(events)), 0, 0, 0)
	} else {
		r0, _, err = syscall.Syscall6(syscall.SYS_EPOLL_WAIT, uintptr(epfd), uintptr(_p0), uintptr(len(events)), uintptr(msec), 0, 0)
	}
	if err == syscall.Errno(0) {
		err = nil
	}
	return int(r0), err
}


================================================
FILE: sys_epoll_linux_arm64.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package netpoll

import (
	"syscall"
	"unsafe"
)

const EPOLLET = syscall.EPOLLET

type epollevent struct {
	events uint32
	_      int32
	data   [8]byte // unaligned uintptr
}

// EpollCreate implements epoll_create1.
func EpollCreate(flag int) (fd int, err error) {
	var r0 uintptr
	r0, _, err = syscall.RawSyscall(syscall.SYS_EPOLL_CREATE1, uintptr(flag), 0, 0)
	if err == syscall.Errno(0) {
		err = nil
	}
	return int(r0), err
}

// EpollCtl implements epoll_ctl.
func EpollCtl(epfd int, op int, fd int, event *epollevent) (err error) {
	_, _, err = syscall.RawSyscall6(syscall.SYS_EPOLL_CTL, uintptr(epfd), uintptr(op), uintptr(fd), uintptr(unsafe.Pointer(event)), 0, 0)
	if err == syscall.Errno(0) {
		err = nil
	}
	return err
}

// EpollWait implements epoll_wait.
func EpollWait(epfd int, events []epollevent, msec int) (n int, err error) {
	var r0 uintptr
	_p0 := unsafe.Pointer(&events[0])
	if msec == 0 {
		r0, _, err = syscall.RawSyscall6(syscall.SYS_EPOLL_PWAIT, uintptr(epfd), uintptr(_p0), uintptr(len(events)), 0, 0, 0)
	} else {
		r0, _, err = syscall.Syscall6(syscall.SYS_EPOLL_PWAIT, uintptr(epfd), uintptr(_p0), uintptr(len(events)), uintptr(msec), 0, 0)
	}
	if err == syscall.Errno(0) {
		err = nil
	}
	return int(r0), err
}


================================================
FILE: sys_epoll_linux_loong64.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build linux && loong64
// +build linux,loong64

package netpoll

import (
	"syscall"
	"unsafe"
)

const EPOLLET = syscall.EPOLLET

type epollevent struct {
	events uint32
	_      int32
	data   [8]byte // unaligned uintptr
}

// EpollCreate implements epoll_create1.
func EpollCreate(flag int) (fd int, err error) {
	var r0 uintptr
	r0, _, err = syscall.RawSyscall(syscall.SYS_EPOLL_CREATE1, uintptr(flag), 0, 0)
	if err == syscall.Errno(0) {
		err = nil
	}
	return int(r0), err
}

// EpollCtl implements epoll_ctl.
func EpollCtl(epfd int, op int, fd int, event *epollevent) (err error) {
	_, _, err = syscall.RawSyscall6(syscall.SYS_EPOLL_CTL, uintptr(epfd), uintptr(op), uintptr(fd), uintptr(unsafe.Pointer(event)), 0, 0)
	if err == syscall.Errno(0) {
		err = nil
	}
	return err
}

// EpollWait implements epoll_wait.
func EpollWait(epfd int, events []epollevent, msec int) (n int, err error) {
	var r0 uintptr
	_p0 := unsafe.Pointer(&events[0])
	if msec == 0 {
		r0, _, err = syscall.RawSyscall6(syscall.SYS_EPOLL_PWAIT, uintptr(epfd), uintptr(_p0), uintptr(len(events)), 0, 0, 0)
	} else {
		r0, _, err = syscall.Syscall6(syscall.SYS_EPOLL_PWAIT, uintptr(epfd), uintptr(_p0), uintptr(len(events)), uintptr(msec), 0, 0)
	}
	if err == syscall.Errno(0) {
		err = nil
	}
	return int(r0), err
}


================================================
FILE: sys_exec.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows
// +build !windows

package netpoll

import (
	"math"
	"os"
	"syscall"
	"unsafe"
)

// GetSysFdPairs creates and returns the fds of a pair of sockets.
func GetSysFdPairs() (r, w int) {
	fds, _ := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM, 0)
	return fds[0], fds[1]
}

// setTCPNoDelay set the TCP_NODELAY flag on socket
func setTCPNoDelay(fd int, b bool) (err error) {
	return syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_NODELAY, boolint(b))
}

// Wrapper around the socket system call that marks the returned file
// descriptor as nonblocking and close-on-exec.
func sysSocket(family, sotype, proto int) (int, error) {
	// See ../syscall/exec_unix.go for description of ForkLock.
	syscall.ForkLock.RLock()
	s, err := syscall.Socket(family, sotype, proto)
	if err == nil {
		syscall.CloseOnExec(s)
	}
	syscall.ForkLock.RUnlock()
	if err != nil {
		return -1, os.NewSyscallError("socket", err)
	}
	if err = syscall.SetNonblock(s, true); err != nil {
		syscall.Close(s)
		return -1, os.NewSyscallError("setnonblock", err)
	}
	return s, nil
}

const barriercap = 32

type barrier struct {
	bs  [][]byte
	ivs []syscall.Iovec
}

// writev wraps the writev system call.
func writev(fd int, bs [][]byte, ivs []syscall.Iovec) (n int, err error) {
	iovLen := iovecs(bs, ivs)
	if iovLen == 0 {
		return 0, nil
	}
	// syscall
	r, _, e := syscall.RawSyscall(syscall.SYS_WRITEV, uintptr(fd), uintptr(unsafe.Pointer(&ivs[0])), uintptr(iovLen))
	resetIovecs(bs, ivs[:iovLen])
	if e != 0 {
		return int(r), e
	}
	return int(r), nil
}

// readv wraps the readv system call.
// return 0, nil means EOF.
func readv(fd int, bs [][]byte, ivs []syscall.Iovec) (n int, err error) {
	iovLen := iovecs(bs, ivs)
	if iovLen == 0 {
		return 0, nil
	}
	// syscall
	r, _, e := syscall.RawSyscall(syscall.SYS_READV, uintptr(fd), uintptr(unsafe.Pointer(&ivs[0])), uintptr(iovLen))
	resetIovecs(bs, ivs[:iovLen])
	if e != 0 {
		return int(r), e
	}
	return int(r), nil
}

// TODO: read from sysconf(_SC_IOV_MAX)? The Linux default is
//
//	1024 and this seems conservative enough for now. Darwin's
//	UIO_MAXIOV also seems to be 1024.
//
// iovecs limit length to 2GB(2^31)
func iovecs(bs [][]byte, ivs []syscall.Iovec) (iovLen int) {
	totalLen := 0
	for i := 0; i < len(bs); i++ {
		chunk := bs[i]
		l := len(chunk)
		if l == 0 {
			continue
		}
		ivs[iovLen].Base = &chunk[0]
		totalLen += l
		if totalLen < math.MaxInt32 {
			ivs[iovLen].SetLen(l)
			iovLen++
		} else {
			newLen := math.MaxInt32 - totalLen + l
			ivs[iovLen].SetLen(newLen)
			iovLen++
			return iovLen
		}
	}

	return iovLen
}

func resetIovecs(bs [][]byte, ivs []syscall.Iovec) {
	for i := 0; i < len(bs); i++ {
		bs[i] = nil
	}
	for i := 0; i < len(ivs); i++ {
		ivs[i].Base = nil
	}
}

// Boolean to int.
func boolint(b bool) int {
	if b {
		return 1
	}
	return 0
}


================================================
FILE: sys_exec_test.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !windows
// +build !windows

package netpoll

import (
	"math"
	"syscall"
	"testing"
)

func TestIovecs(t *testing.T) {
	var got int
	var bs [][]byte
	ivs := make([]syscall.Iovec, 4)

	// case 1
	bs = [][]byte{
		make([]byte, 10),
		make([]byte, 20),
		make([]byte, 30),
		make([]byte, 40),
	}
	got = iovecs(bs, ivs)
	Equal(t, got, 4)
	Equal(t, int(ivs[0].Len), 10)
	Equal(t, int(ivs[1].Len), 20)
	Equal(t, int(ivs[2].Len), 30)
	Equal(t, int(ivs[3].Len), 40)

	// case 2
	resetIovecs(bs, ivs)
	bs = [][]byte{
		make([]byte, math.MaxInt32+100),
		make([]byte, 20),
		make([]byte, 30),
		make([]byte, 40),
	}
	got = iovecs(bs, ivs)
	Equal(t, got, 1)
	Equal(t, int(ivs[0].Len), math.MaxInt32)
	Assert(t, ivs[1].Base == nil)
	Assert(t, ivs[2].Base == nil)
	Assert(t, ivs[3].Base == nil)

	// case 3
	resetIovecs(bs, ivs)
	bs = [][]byte{
		make([]byte, 10),
		make([]byte, 20),
		make([]byte, math.MaxInt32+100),
		make([]byte, 40),
	}
	got = iovecs(bs, ivs)
	Equal(t, got, 3)
	Equal(t, int(ivs[0].Len), 10)
	Equal(t, int(ivs[1].Len), 20)
	Equal(t, int(ivs[2].Len), math.MaxInt32-30)
	Assert(t, ivs[3].Base == nil)
}

func TestWritev(t *testing.T) {
	r, w := GetSysFdPairs()
	barrier := barrier{}
	barrier.bs = [][]byte{
		[]byte(""),            // len=0
		[]byte("first line"),  // len=10
		[]byte("second line"), // len=11
		[]byte("third line"),  // len=10
	}
	barrier.ivs = make([]syscall.Iovec, len(barrier.bs))
	wn, err := writev(w, barrier.bs, barrier.ivs)
	MustNil(t, err)
	Equal(t, wn, 31)
	p := make([]byte, 50)
	rn, err := syscall.Read(r, p)
	MustNil(t, err)
	Equal(t, rn, 31)
	t.Logf("READ %s", p[:rn])
}

func TestReadv(t *testing.T) {
	r, w := GetSysFdPairs()
	vs := [][]byte{
		[]byte("first line"),  // len=10
		[]byte("second line"), // len=11
		[]byte("third line"),  // len=10
	}
	w1, _ := syscall.Write(w, vs[0])
	w2, _ := syscall.Write(w, vs[1])
	w3, _ := syscall.Write(w, vs[2])
	Equal(t, w1+w2+w3, 31)

	barrier := barrier{
		bs: make([][]byte, 4),
	}
	res := [][]byte{
		make([]byte, 0),
		make([]byte, 10),
		make([]byte, 11),
		make([]byte, 10),
	}
	copy(barrier.bs, res)
	barrier.ivs = make([]syscall.Iovec, len(barrier.bs))
	rn, err := readv(r, barrier.bs, barrier.ivs)
	MustNil(t, err)
	Equal(t, rn, 31)
	for i, v := range res {
		t.Logf("READ [%d] %s", i, v)
	}
}

func TestSendmsg(t *testing.T) {
	r, w := GetSysFdPairs()
	barrier := barrier{}
	barrier.bs = [][]byte{
		[]byte(""),            // len=0
		[]byte("first line"),  // len=10
		[]byte("second line"), // len=11
		[]byte("third line"),  // len=10
	}
	barrier.ivs = make([]syscall.Iovec, len(barrier.bs))
	wn, err := sendmsg(w, barrier.bs, barrier.ivs, false)
	MustNil(t, err)
	Equal(t, wn, 31)
	p := make([]byte, 50)
	rn, err := syscall.Read(r, p)
	MustNil(t, err)
	Equal(t, rn, 31)
	t.Logf("READ %s", p[:rn])
}

func BenchmarkWrite(b *testing.B) {
	b.StopTimer()
	r, w := GetSysFdPairs()
	message := "hello, world!"
	size := 5

	go func() {
		buffer := make([]byte, 13)
		for {
			syscall.Read(r, buffer)
		}
	}()

	// benchmark
	b.ReportAllocs()
	b.StartTimer()
	for i := 0; i < b.N; i++ {
		wmsg := make([]byte, len(message)*5)
		var n int
		for j := 0; j < size; j++ {
			n += copy(wmsg[n:], message)
		}
		syscall.Write(w, wmsg)
	}
}

func BenchmarkWritev(b *testing.B) {
	b.StopTimer()
	r, w := GetSysFdPairs()
	message := "hello, world!"
	size := 5
	barrier := barrier{}
	barrier.bs = make([][]byte, size)
	barrier.ivs = make([]syscall.Iovec, len(barrier.bs))
	for i := range barrier.bs {
		barrier.bs[i] = make([]byte, len(message))
	}

	go func() {
		buffer := make([]byte, 13)
		for {
			syscall.Read(r, buffer)
		}
	}()

	// benchmark
	b.ReportAllocs()
	b.StartTimer()
	for i := 0; i < b.N; i++ {
		writev(w, barrier.bs, barrier.ivs)
	}
}

func BenchmarkSendmsg(b *testing.B) {
	b.StopTimer()
	r, w := GetSysFdPairs()
	message := "hello, world!"
	size := 5
	barrier := barrier{}
	barrier.bs = make([][]byte, size)
	barrier.ivs = make([]syscall.Iovec, len(barrier.bs))
	for i := range barrier.bs {
		barrier.bs[i] = make([]byte, len(message))
	}

	go func() {
		buffer := make([]byte, 13)
		for {
			syscall.Read(r, buffer)
		}
	}()

	// benchmark
	b.ReportAllocs()
	b.StartTimer()
	for i := 0; i < b.N; i++ {
		sendmsg(w, barrier.bs, barrier.ivs, false)
	}
}

func BenchmarkRead(b *testing.B) {
	b.StopTimer()
	r, w := GetSysFdPairs()
	message := "hello, world!"
	size := 5
	wmsg := make([]byte, size*len(message))
	var n int
	for j := 0; j < size; j++ {
		n += copy(wmsg[n:], message)
	}

	go func() {
		for {
			syscall.Write(w, wmsg)
		}
	}()

	// benchmark
	b.ReportAllocs()
	b.StartTimer()
	for i := 0; i < b.N; i++ {
		buffer := make([]byte, size*len(message))
		syscall.Read(r, buffer)
	}
}

func BenchmarkReadv(b *testing.B) {
	b.StopTimer()
	r, w := GetSysFdPairs()
	message := "hello, world!"
	size := 5
	barrier := barrier{}
	barrier.bs = make([][]byte, size)
	barrier.ivs = make([]syscall.Iovec, len(barrier.bs))
	for i := range barrier.bs {
		barrier.bs[i] = make([]byte, len(message))
	}

	go func() {
		for {
			writeAll(w, []byte(message))
		}
	}()

	// benchmark
	b.ReportAllocs()
	b.StartTimer()
	for i := 0; i < b.N; i++ {
		readv(r, barrier.bs, barrier.ivs)
	}
}


================================================
FILE: sys_keepalive_darwin.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package netpoll

import "syscall"

// SetKeepAlive sets the keepalive for the connection
func SetKeepAlive(fd, secs int) error {
	if err := syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_KEEPALIVE, 1); err != nil {
		return err
	}
	switch err := syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, 0x101, secs); err {
	case nil, syscall.ENOPROTOOPT: // OS X 10.7 and earlier don't support this option
	default:
		return err
	}
	return syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_KEEPALIVE, secs)
}


================================================
FILE: sys_keepalive_openbsd.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package netpoll

// SetKeepAlive sets the keepalive for the connection
func SetKeepAlive(fd, secs int) error {
	// OpenBSD has no user-settable per-socket TCP keepalive options.
	return nil
}


================================================
FILE: sys_keepalive_unix.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build netbsd || freebsd || dragonfly || linux
// +build netbsd freebsd dragonfly linux

package netpoll

import "syscall"

// just support ipv4
func SetKeepAlive(fd, secs int) error {
	// open keep-alive
	if err := syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_KEEPALIVE, 1); err != nil {
		return err
	}
	// tcp_keepalive_intvl
	if err := syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_KEEPINTVL, secs); err != nil {
		return err
	}
	// tcp_keepalive_probes
	// if err := syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_KEEPCNT, 1); err != nil {
	// 	return err
	// }
	// tcp_keepalive_time
	return syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_KEEPIDLE, secs)
}


================================================
FILE: sys_sendmsg_bsd.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build darwin || dragonfly || freebsd || netbsd || openbsd
// +build darwin dragonfly freebsd netbsd openbsd

package netpoll

import (
	"syscall"
	"unsafe"
)

// sendmsg wraps the sendmsg system call.
// Must len(iovs) >= len(vs)
func sendmsg(fd int, bs [][]byte, ivs []syscall.Iovec, zerocopy bool) (n int, err error) {
	iovLen := iovecs(bs, ivs)
	if iovLen == 0 {
		return 0, nil
	}
	msghdr := syscall.Msghdr{
		Iov:    &ivs[0],
		Iovlen: int32(iovLen),
	}
	// flags = syscall.MSG_DONTWAIT
	r, _, e := syscall.RawSyscall(syscall.SYS_SENDMSG, uintptr(fd), uintptr(unsafe.Pointer(&msghdr)), uintptr(0))
	resetIovecs(bs, ivs[:iovLen])
	if e != 0 {
		return int(r), e
	}
	return int(r), nil
}


================================================
FILE: sys_sendmsg_linux.go
================================================
// Copyright 2022 CloudWeGo Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package netpoll

import (
	"syscall"
	"unsafe"
)

//func init() {
//	err := syscall.Setrlimit(8, &syscall.Rlimit{
//		Cur: 0xffffffff,
//		Max: 0xffffffff,
//	})
//	if err != nil {
//		panic(err)
//	}
//}

// sendmsg wraps the sendmsg system call.
// Must len(iovs) >= len(vs)
func sendmsg(fd int, bs [][]byte, ivs []syscall.Iovec, zerocopy bool) (n int, err error) {
	iovLen := iovecs(bs, ivs)
	if iovLen == 0 {
		return 0, nil
	}
	msghdr := syscall.Msghdr{
		Iov:    &ivs[0],
		Iovlen: uint64(iovLen),
	}
	r, _, e := syscall.RawSyscall(syscall.SYS_SENDMSG, uintptr(fd), uintptr(unsafe.Pointer(&msghdr)), 0)
	resetIovecs(bs, ivs[:iovLen])
	if e != 0 {
		return int(r), e
	}
	return int(r), nil
}


================================================
FILE: sys_sockopt_bsd.go
================================================
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//
// This file may have been modified by CloudWeGo authors. (“CloudWeGo Modifications”).
// All CloudWeGo Modifications are Copyright 2022 CloudWeGo authors.

//go:build darwin || dragonfly || freebsd || netbsd || openbsd
// +build darwin dragonfly freebsd netbsd openbsd

package netpoll

import (
	"os"
	"runtime"
	"syscall"
)

func setDefaultSockopts(s, family, sotype int, ipv6only bool) error {
	if runtime.GOOS == "dragonfly" && sotype != syscall.SOCK_RAW {
		// On DragonFly BSD, we adjust the ephemeral port
		// range because unlike other BSD systems its default
		// port range doesn't conform to IANA recommendation
		// as described in RFC 6056 and is pretty narrow.
		switch family {
		case syscall.AF_INET:
			syscall.SetsockoptInt(s, syscall.IPPROTO_IP, syscall.IP_PORTRANGE, syscall.IP_PORTRANGE_HIGH)
		case syscall.AF_INET6:
			syscall.SetsockoptInt(s, syscall.IPPROTO_IPV6, syscall.IPV6_PORTRANGE, syscall.IPV6_PORTRANGE_HIGH)
		}
	}
	// Allow broadcast.
	return os.NewSyscallError("setsockopt", syscall.SetsockoptInt(s, syscall.SOL_SOCKET, syscall.SO_BROADCAST, 1))
}


================================================
FILE: sys_sockopt_linux.go
================================================
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//
// This file may have been modified by CloudWeGo authors. (“CloudWeGo Modifications”).
// All CloudWeGo Modifications are Copyright 2022 CloudWeGo authors.

package netpoll

import (
	"os"
	"syscall"
)

func setDefaultSockopts(s, family, sotype int, ipv6only bool) error {
	if family == syscall.AF_INET6 && sotype != syscall.SOCK_RAW {
		// Allow both IP versions even if the OS default
		// is otherwise. Note that some operating systems
		// never admit this option.
		syscall.SetsockoptInt(s, syscall.IPPROTO_IPV6, syscall.IPV6_V6ONLY, boolint(ipv6only))
	}

	// Allow broadcast.
	return os.NewSyscallError("setsockopt", syscall.SetsockoptInt(s, syscall.SOL_SOCKET, syscall.SO_BROADCAST, 1))
}


================================================
FILE: test_conns.sh
================================================
#!/usr/bin/env bash

ip="$1"
port="$2"
conns="$3"
timeout="$4"

for i in $(seq 1 $conns);
do
  nc -v -w $timeout $ip $port < /dev/null &
done

wait