Repository: cloudwego/netpoll Branch: main Commit: fcc5e9d814c8 Files: 92 Total size: 356.8 KB Directory structure: gitextract_ncyzmgn6/ ├── .github/ │ ├── CODEOWNERS │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.md │ │ └── feature_request.md │ ├── PULL_REQUEST_TEMPLATE.md │ └── workflows/ │ └── pr-check.yml ├── .gitignore ├── .golangci.yaml ├── .licenserc.yaml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── CREDITS ├── LICENSE ├── NOTICE ├── README.md ├── README_CN.md ├── _typos.toml ├── connection.go ├── connection_errors.go ├── connection_errors_test.go ├── connection_impl.go ├── connection_lock.go ├── connection_onevent.go ├── connection_reactor.go ├── connection_test.go ├── docs/ │ ├── guide/ │ │ ├── guide_cn.md │ │ └── guide_en.md │ └── reference/ │ ├── design_cn.md │ ├── design_en.md │ └── explain.md ├── eventloop.go ├── fd_operator.go ├── fd_operator_cache.go ├── fd_operator_cache_test.go ├── go.mod ├── go.sum ├── internal/ │ └── runner/ │ ├── runner.go │ └── runner_test.go ├── lint.sh ├── mux/ │ ├── mux_test.go │ ├── shard_queue.go │ └── shard_queue_test.go ├── net_dialer.go ├── net_dialer_test.go ├── net_io.go ├── net_listener.go ├── net_listener_test.go ├── net_netfd.go ├── net_netfd_conn.go ├── net_polldesc.go ├── net_polldesc_test.go ├── net_sock.go ├── net_tcpsock.go ├── net_unixsock.go ├── netpoll_config.go ├── netpoll_options.go ├── netpoll_server.go ├── netpoll_unix.go ├── netpoll_unix_test.go ├── netpoll_windows.go ├── nocopy.go ├── nocopy_linkbuffer.go ├── nocopy_linkbuffer_norace.go ├── nocopy_linkbuffer_race.go ├── nocopy_linkbuffer_test.go ├── nocopy_readwriter.go ├── nocopy_readwriter_test.go ├── poll.go ├── poll_default.go ├── poll_default_bsd.go ├── poll_default_bsd_norace.go ├── poll_default_bsd_race.go ├── poll_default_linux.go ├── poll_default_linux_norace.go ├── poll_default_linux_race.go ├── poll_default_linux_test.go ├── poll_loadbalance.go ├── poll_manager.go ├── poll_manager_test.go ├── poll_test.go ├── sys_epoll_linux.go ├── sys_epoll_linux_arm64.go ├── sys_epoll_linux_loong64.go ├── sys_exec.go ├── sys_exec_test.go ├── sys_keepalive_darwin.go ├── sys_keepalive_openbsd.go ├── sys_keepalive_unix.go ├── sys_sendmsg_bsd.go ├── sys_sendmsg_linux.go ├── sys_sockopt_bsd.go ├── sys_sockopt_linux.go └── test_conns.sh ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/CODEOWNERS ================================================ # For more information, please refer to https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners * @cloudwego/netpoll-reviewers @cloudwego/netpoll-approvers @cloudwego/netpoll-maintainers ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.md ================================================ --- name: Bug report about: Create a report to help us improve title: '' labels: '' assignees: '' --- **Describe the bug** A clear and concise description of what the bug is. **To Reproduce** Steps to reproduce the behavior: 1. Go to '...' 2. Click on '....' 3. Scroll down to '....' 4. See error **Expected behavior** A clear and concise description of what you expected to happen. **Screenshots** If applicable, add screenshots to help explain your problem. **Desktop (please complete the following information):** - OS: [e.g. iOS] - Browser [e.g. chrome, safari] - Version [e.g. 22] **Smartphone (please complete the following information):** - Device: [e.g. iPhone6] - OS: [e.g. iOS8.1] - Browser [e.g. stock browser, safari] - Version [e.g. 22] **Additional context** Add any other context about the problem here. ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.md ================================================ --- name: Feature request about: Suggest an idea for this project title: '' labels: '' assignees: '' --- **Is your feature request related to a problem? Please describe.** A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] **Describe the solution you'd like** A clear and concise description of what you want to happen. **Describe alternatives you've considered** A clear and concise description of any alternative solutions or features you've considered. **Additional context** Add any other context or screenshots about the feature request here. ================================================ FILE: .github/PULL_REQUEST_TEMPLATE.md ================================================ ================================================ FILE: .github/workflows/pr-check.yml ================================================ name: Push and Pull Request Check on: [ push, pull_request ] jobs: compatibility-test: strategy: matrix: go: [ 1.18, 1.24 ] os: [ ubuntu-latest, ubuntu-24.04-arm, macos-latest ] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 - name: Set up Go uses: actions/setup-go@v5 with: go-version: ${{ matrix.go }} - name: Unit Test run: go test -timeout=2m -race ./... - name: Benchmark run: go test -bench=. -benchmem -run=none ./... -benchtime=100ms windows-test: runs-on: windows-latest steps: - uses: actions/checkout@v4 - name: Set up Go uses: actions/setup-go@v5 with: go-version: stable - name: Build Test run: go vet ./... compliant: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Check License Header uses: apache/skywalking-eyes/header@v0.4.0 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Check Spell uses: crate-ci/typos@v1.13.14 golangci-lint: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Set up Go uses: actions/setup-go@v5 with: go-version: stable # for self-hosted, the cache path is shared across projects # and it works well without the cache of github actions # Enable it if we're going to use Github only cache: false - name: Golangci Lint # https://golangci-lint.run/ uses: golangci/golangci-lint-action@v6 with: version: latest only-new-issues: true ================================================ FILE: .gitignore ================================================ # Binaries for programs and plugins *.exe *.exe~ *.dll *.so *.dylib # Test binary, built with `go test -c` *.test # Output of the go coverage tool, specifically when used with LiteIDE *.out # Dependency directories (remove the comment below to include it) # vendor/ .idea/ ================================================ FILE: .golangci.yaml ================================================ # Options for analysis running. run: timeout: 3m linters: # https://golangci-lint.run/usage/linters/ disable-all: true enable: - gosimple - govet - ineffassign - staticcheck - unused - unconvert - goimports - gofumpt # Refer to https://golangci-lint.run/usage/linters linters-settings: gofumpt: # Choose whether to use the extra rules. # Default: false extra-rules: true goimports: # Put imports beginning with prefix after 3rd-party packages. # It's a comma-separated list of prefixes. local-prefixes: github.com/cloudwego/netpoll issues: exclude-use-default: true ================================================ FILE: .licenserc.yaml ================================================ header: license: spdx-id: Apache-2.0 copyright-owner: CloudWeGo Authors paths: - '**/*.go' - '**/*.s' paths-ignore: - 'net_netfd.go' - 'net_sock.go' - 'net_tcpsock.go' - 'net_unixsock.go' - 'sys_sockopt_bsd.go' - 'sys_sockopt_linux.go' comment: on-failure ================================================ FILE: CODE_OF_CONDUCT.md ================================================ # Contributor Covenant Code of Conduct ## Our Pledge We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community. ## Our Standards Examples of behavior that contributes to a positive environment for our community include: * Demonstrating empathy and kindness toward other people * Being respectful of differing opinions, viewpoints, and experiences * Giving and gracefully accepting constructive feedback * Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience * Focusing on what is best not just for us as individuals, but for the overall community Examples of unacceptable behavior include: * The use of sexualized language or imagery, and sexual attention or advances of any kind * Trolling, insulting or derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or email address, without their explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting ## Enforcement Responsibilities Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate. ## Scope This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at conduct@cloudwego.io. All complaints will be reviewed and investigated promptly and fairly. All community leaders are obligated to respect the privacy and security of the reporter of any incident. ## Enforcement Guidelines Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct: ### 1. Correction **Community Impact**: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community. **Consequence**: A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested. ### 2. Warning **Community Impact**: A violation through a single incident or series of actions. **Consequence**: A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban. ### 3. Temporary Ban **Community Impact**: A serious violation of community standards, including sustained inappropriate behavior. **Consequence**: A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban. ### 4. Permanent Ban **Community Impact**: Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. **Consequence**: A permanent ban from any sort of public interaction within the community. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.0, available at https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. Community Impact Guidelines were inspired by [Mozilla's code of conduct enforcement ladder](https://github.com/mozilla/diversity). [homepage]: https://www.contributor-covenant.org For answers to common questions about this code of conduct, see the FAQ at https://www.contributor-covenant.org/faq. Translations are available at https://www.contributor-covenant.org/translations. ================================================ FILE: CONTRIBUTING.md ================================================ # How to Contribute ## Your First Pull Request We use github for our codebase. You can start by reading [How To Pull Request](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests). ## Without Semantic Versioning We keep the stable code in branch `main` like `golang.org/x`. Development base on branch `develop`. And we promise the **Forward Compatibility** by adding new package directory with suffix `v2/v3` when code has break changes. ## Branch Organization We use [git-flow](https://nvie.com/posts/a-successful-git-branching-model/) as our branch organization, as known as [FDD](https://en.wikipedia.org/wiki/Feature-driven_development) ## Bugs ### 1. How to Find Known Issues We are using [Github Issues](https://github.com/cloudwego/netpoll/issues) for our public bugs. We keep a close eye on this and try to make it clear when we have an internal fix in progress. Before filing a new task, try to make sure your problem doesn’t already exist. ### 2. Reporting New Issues Providing a reduced test code is a recommended way for reporting issues. Then can be placed in: - Just in issues - [Golang Playground](https://play.golang.org/) ### 3. Security Bugs Please do not report the safe disclosure of bugs to public issues. Contact us by [Support Email](mailto:conduct@cloudwego.io) ## How to Get in Touch - [Email](mailto:conduct@cloudwego.io) ## Submit a Pull Request Before you submit your Pull Request (PR) consider the following guidelines: 1. Search [GitHub](https://github.com/cloudwego/netpoll/pulls) for an open or closed PR that relates to your submission. You don't want to duplicate existing efforts. 2. Be sure that an issue describes the problem you're fixing, or documents the design for the feature you'd like to add. Discussing the design upfront helps to ensure that we're ready to accept your work. 3. [Fork](https://docs.github.com/en/github/getting-started-with-github/fork-a-repo) the cloudwego/netpoll repo. 4. In your forked repository, make your changes in a new git branch: ``` git checkout -b my-fix-branch main ``` 5. Create your patch, including appropriate test cases. 6. Follow our [Style Guides](#code-style-guides). 7. Commit your changes using a descriptive commit message that follows [AngularJS Git Commit Message Conventions](https://docs.google.com/document/d/1QrDFcIiPjSLDn3EL15IJygNPiHORgU1_OOAqWjiDU5Y/edit). Adherence to these conventions is necessary because release notes are automatically generated from these messages. 8. Push your branch to GitHub: ``` git push origin my-fix-branch ``` 9. In GitHub, send a pull request to `netpoll:main` ## Contribution Prerequisites - Our development environment keeps up with [Go Official](https://golang.org/project/). - You need to fully check with lint tools before submitting your pull request. [gofmt](https://golang.org/pkg/cmd/gofmt/) and [golangci-lint](https://github.com/golangci/golangci-lint) - You are familiar with [Github](https://github.com) - Maybe you need to be familiar with [Actions](https://github.com/features/actions)(our default workflow tool). ## Code Style Guides - [Effective Go](https://golang.org/doc/effective_go) - [Go Code Review Comments](https://github.com/golang/go/wiki/CodeReviewComments) ================================================ FILE: CREDITS ================================================ ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: NOTICE ================================================ CloudWeGO Copyright 2022 CloudWeGO authors. Go Copyright (c) 2009 The Go Authors. ================================================ FILE: README.md ================================================ # CloudWeGo-Netpoll [中文](README_CN.md) [![Release](https://img.shields.io/github/v/release/cloudwego/netpoll)](https://github.com/cloudwego/netpoll/releases) [![WebSite](https://img.shields.io/website?up_message=cloudwego&url=https%3A%2F%2Fwww.cloudwego.io%2F)](https://www.cloudwego.io/) [![License](https://img.shields.io/github/license/cloudwego/netpoll)](https://github.com/cloudwego/netpoll/blob/main/LICENSE) [![Go Report Card](https://goreportcard.com/badge/github.com/cloudwego/netpoll)](https://goreportcard.com/report/github.com/cloudwego/netpoll) [![OpenIssue](https://img.shields.io/github/issues/cloudwego/netpoll)](https://github.com/cloudwego/netpoll/issues) [![ClosedIssue](https://img.shields.io/github/issues-closed/cloudwego/netpoll)](https://github.com/cloudwego/netpoll/issues?q=is%3Aissue+is%3Aclosed) ![Stars](https://img.shields.io/github/stars/cloudwego/netpoll) ![Forks](https://img.shields.io/github/forks/cloudwego/netpoll) ## Introduction [Netpoll][Netpoll] is a high-performance non-blocking I/O networking framework, which focused on RPC scenarios, developed by [ByteDance][ByteDance]. RPC is usually heavy on processing logic and therefore cannot handle I/O serially. But Go's standard library [net][net] is designed for blocking I/O APIs, so that the RPC framework can only follow the One Conn One Goroutine design. It will waste a lot of cost for context switching, due to a large number of goroutines under high concurrency. Besides, [net.Conn][net.Conn] has no API to check Alive, so it is difficult to make an efficient connection pool for RPC framework, because there may be a large number of failed connections in the pool. On the other hand, the open source community currently lacks Go network libraries that focus on RPC scenarios. Similar repositories such as: [evio][evio], [gnet][gnet], etc., are all focus on scenarios like [Redis][Redis], [HAProxy][HAProxy]. But now, [Netpoll][Netpoll] was born and solved the above problems. It draws inspiration from the design of [evio][evio] and [netty][netty], has excellent [Performance](#performance), and is more suitable for microservice architecture. Also [Netpoll][Netpoll] provides a number of [Features](#features), and it is recommended to replace [net][net] in some RPC scenarios. We developed the RPC framework [Kitex][Kitex] and HTTP framework [Hertz][Hertz] based on [Netpoll][Netpoll], both with industry-leading performance. [Examples][netpoll-examples] show how to build RPC client and server using [Netpoll][Netpoll]. For more information, please refer to [Document](#document). ## Features * **Already** - [LinkBuffer][LinkBuffer] provides nocopy API for streaming reading and writing - [gopool][gopool] provides high-performance goroutine pool - [mcache][mcache] provides efficient memory reuse - `IsActive` supports checking whether the connection is alive - `Dialer` supports building clients - `EventLoop` supports building a server - TCP, Unix Domain Socket - Linux, macOS (operating system) * **Unsupported** - Windows (operating system) ## Performance Benchmark should meet the requirements of industrial use. In the RPC scenario, concurrency and timeout are necessary support items. We provide the [netpoll-benchmark][netpoll-benchmark] project to track and compare the performance of [Netpoll][Netpoll] and other frameworks under different conditions for reference. More benchmarks reference [kitex-benchmark][kitex-benchmark] and [hertz-benchmark][hertz-benchmark]. ## Reference * [Official Website](https://www.cloudwego.io) * [Getting Started](docs/guide/guide_en.md) * [Design](docs/reference/design_en.md) * [Why DATA RACE](docs/reference/explain.md) [Netpoll]: https://github.com/cloudwego/netpoll [net]: https://github.com/golang/go/tree/master/src/net [net.Conn]: https://github.com/golang/go/blob/master/src/net/net.go [evio]: https://github.com/tidwall/evio [gnet]: https://github.com/panjf2000/gnet [netty]: https://github.com/netty/netty [Kitex]: https://github.com/cloudwego/kitex [Hertz]: https://github.com/cloudwego/hertz [netpoll-benchmark]: https://github.com/cloudwego/netpoll-benchmark [kitex-benchmark]: https://github.com/cloudwego/kitex-benchmark [hertz-benchmark]: https://github.com/cloudwego/hertz-benchmark [netpoll-examples]:https://github.com/cloudwego/netpoll-examples [ByteDance]: https://www.bytedance.com [Redis]: https://redis.io [HAProxy]: http://www.haproxy.org [LinkBuffer]: nocopy_linkbuffer.go [gopool]: https://github.com/bytedance/gopkg/tree/develop/util/gopool [mcache]: https://github.com/bytedance/gopkg/tree/develop/lang/mcache ================================================ FILE: README_CN.md ================================================ # CloudWeGo-Netpoll [English](README.md) [![Release](https://img.shields.io/github/v/release/cloudwego/netpoll)](https://github.com/cloudwego/netpoll/releases) [![WebSite](https://img.shields.io/website?up_message=cloudwego&url=https%3A%2F%2Fwww.cloudwego.io%2F)](https://www.cloudwego.io/) [![License](https://img.shields.io/github/license/cloudwego/netpoll)](https://github.com/cloudwego/netpoll/blob/main/LICENSE) [![Go Report Card](https://goreportcard.com/badge/github.com/cloudwego/netpoll)](https://goreportcard.com/report/github.com/cloudwego/netpoll) [![OpenIssue](https://img.shields.io/github/issues/cloudwego/netpoll)](https://github.com/cloudwego/netpoll/issues) [![ClosedIssue](https://img.shields.io/github/issues-closed/cloudwego/netpoll)](https://github.com/cloudwego/netpoll/issues?q=is%3Aissue+is%3Aclosed) ![Stars](https://img.shields.io/github/stars/cloudwego/netpoll) ![Forks](https://img.shields.io/github/forks/cloudwego/netpoll) ## 简介 [Netpoll][Netpoll] 是由 [字节跳动][ByteDance] 开发的高性能 NIO(Non-blocking I/O) 网络库,专注于 RPC 场景。 RPC 通常有较重的处理逻辑,因此无法串行处理 I/O。而 Go 的标准库 [net][net] 设计了 BIO(Blocking I/O) 模式的 API,使得 RPC 框架设计上只能为每个连接都分配一个 goroutine。 这在高并发下,会产生大量的 goroutine,大幅增加调度开销。此外,[net.Conn][net.Conn] 没有提供检查连接活性的 API,因此 RPC 框架很难设计出高效的连接池,池中的失效连接无法及时清理。 另一方面,开源社区目前缺少专注于 RPC 方案的 Go 网络库。类似的项目如:[evio][evio] , [gnet][gnet] 等,均面向 [Redis][Redis], [HAProxy][HAProxy] 这样的场景。 因此 [Netpoll][Netpoll] 应运而生,它借鉴了 [evio][evio] 和 [netty][netty] 的优秀设计,具有出色的 [性能](#性能),更适用于微服务架构。 同时,[Netpoll][Netpoll] 还提供了一些 [特性](#特性),推荐在 RPC 设计中替代 [net][net] 。 基于 [Netpoll][Netpoll] 开发的 RPC 框架 [Kitex][Kitex] 和 HTTP 框架 [Hertz][Hertz],性能均业界领先。 [范例][netpoll-examples] 展示了如何使用 [Netpoll][Netpoll] 构建 RPC Client 和 Server。 更多信息请参阅 [文档](#文档)。 ## 特性 * **已经支持** - [LinkBuffer][LinkBuffer] 提供可以流式读写的 nocopy API - [gopool][gopool] 提供高性能的 goroutine 池 - [mcache][mcache] 提供高效的内存复用 - `IsActive` 支持检查连接是否存活 - `Dialer` 支持构建 client - `EventLoop` 支持构建 server - 支持 TCP,Unix Domain Socket - 支持 Linux,macOS(操作系统) * **不被支持** - Windows(操作系统) ## 性能 性能测试应满足工业级使用要求,在 RPC 场景下,并发请求、等待超时是必要的支持项。 我们提供了 [netpoll-benchmark][netpoll-benchmark] 项目用来长期追踪和比较 [Netpoll][Netpoll] 与其他框架在不同情况下的性能数据以供参考。 更多测试参考 [kitex-benchmark][kitex-benchmark] 和 [hertz-benchmark][hertz-benchmark] ## 参考 * [官方网站](https://www.cloudwego.io) * [使用文档](docs/guide/guide_cn.md) * [设计文档](docs/reference/design_cn.md) * [DATA RACE 说明](docs/reference/explain.md) [Netpoll]: https://github.com/cloudwego/netpoll [net]: https://github.com/golang/go/tree/master/src/net [net.Conn]: https://github.com/golang/go/blob/master/src/net/net.go [evio]: https://github.com/tidwall/evio [gnet]: https://github.com/panjf2000/gnet [netty]: https://github.com/netty/netty [Kitex]: https://github.com/cloudwego/kitex [Hertz]: https://github.com/cloudwego/hertz [netpoll-benchmark]: https://github.com/cloudwego/netpoll-benchmark [kitex-benchmark]: https://github.com/cloudwego/kitex-benchmark [hertz-benchmark]: https://github.com/cloudwego/hertz-benchmark [netpoll-examples]:https://github.com/cloudwego/netpoll-examples [ByteDance]: https://www.bytedance.com [Redis]: https://redis.io [HAProxy]: http://www.haproxy.org [LinkBuffer]: nocopy_linkbuffer.go [gopool]: https://github.com/bytedance/gopkg/tree/develop/util/gopool [mcache]: https://github.com/bytedance/gopkg/tree/develop/lang/mcache ================================================ FILE: _typos.toml ================================================ # Typo check: https://github.com/crate-ci/typos [files] extend-exclude = ["go.mod", "go.sum"] [default.extend-identifiers] # *sigh* this just isn't worth the cost of fixing nd = "nd" write_datas = "write_datas" ================================================ FILE: connection.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package netpoll import ( "net" "time" ) // CloseCallback will be called after the connection is closed. // Return: error is unused which will be ignored directly. type CloseCallback func(connection Connection) error // Connection supports reading and writing simultaneously, // but does not support simultaneous reading or writing by multiple goroutines. // It maintains its own input/output buffer, and provides nocopy API for reading and writing. type Connection interface { // Connection extends net.Conn, just for interface compatibility. // It's not recommended to use net.Conn API except for io.Closer. net.Conn // The recommended API for nocopy reading and writing. // Reader will return nocopy buffer data, or error after timeout which set by SetReadTimeout. Reader() Reader // Writer will write data to the connection by NIO mode, // so it will return an error only when the connection isn't Active. Writer() Writer // IsActive checks whether the connection is active or not. IsActive() bool // SetReadTimeout sets the timeout for future Read calls wait. // A zero value for timeout means Reader will not timeout. SetReadTimeout(timeout time.Duration) error // SetWriteTimeout sets the timeout for future Write calls wait. // A zero value for timeout means Writer will not timeout. SetWriteTimeout(timeout time.Duration) error // SetIdleTimeout sets the idle timeout of connections by enabling TCP KeepAlive // and setting the KeepAlive interval to the given timeout duration. // NOTE: Despite its name, this does not track application-level idle time. // It configures OS-level TCP KeepAlive to detect dead peers on idle connections. // The name is kept for backward compatibility. SetIdleTimeout(timeout time.Duration) error // SetOnRequest can set or replace the OnRequest method for a connection, but can't be set to nil. // Although SetOnRequest avoids data race, it should still be used before transmitting data. // Replacing OnRequest while processing data may cause unexpected behavior and results. // Generally, the server side should uniformly set the OnRequest method for each connection via NewEventLoop, // which is set when the connection is initialized. // On the client side, if necessary, make sure that OnRequest is set before sending data. SetOnRequest(on OnRequest) error // AddCloseCallback can add hangup callback for a connection, which will be called when connection closing. // This is very useful for cleaning up idle connections. For instance, you can use callbacks to clean up // the local resources, which bound to the idle connection, when hangup by the peer. No need another goroutine // to polling check connection status. AddCloseCallback(callback CloseCallback) error } // Conn extends net.Conn, but supports getting the conn's fd. type Conn interface { net.Conn // Fd return conn's fd, used by poll Fd() (fd int) } // Listener extends net.Listener, but supports getting the listener's fd. type Listener interface { net.Listener // Fd return listener's fd, used by poll. Fd() (fd int) } // Dialer extends net.Dialer's API, just for interface compatibility. // DialConnection is recommended, but of course all functions are practically the same. // The returned net.Conn can be directly asserted as Connection if error is nil. type Dialer interface { DialConnection(network, address string, timeout time.Duration) (connection Connection, err error) DialTimeout(network, address string, timeout time.Duration) (conn net.Conn, err error) } ================================================ FILE: connection_errors.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package netpoll import ( "fmt" "net" "syscall" ) // extends syscall.Errno, the range is set to 0x100-0x1FF const ( // The connection closed when in use. ErrConnClosed = syscall.Errno(0x101) // Read I/O buffer timeout, called by Connection.Reader ErrReadTimeout = syscall.Errno(0x102) // Dial timeout ErrDialTimeout = syscall.Errno(0x103) // Calling dialer without timeout. ErrDialNoDeadline = syscall.Errno(0x104) // TODO: no-deadline support in future // The calling function not support. ErrUnsupported = syscall.Errno(0x105) // Same as io.EOF ErrEOF = syscall.Errno(0x106) // Write I/O buffer timeout, calling by Connection.Writer ErrWriteTimeout = syscall.Errno(0x107) // Concurrent connection access error ErrConcurrentAccess = syscall.Errno(0x108) ) const ErrnoMask = 0xFF // wrap Errno, implement xerrors.Wrapper func Exception(err error, suffix string) error { no, ok := err.(syscall.Errno) if !ok { if suffix == "" { return err } return fmt.Errorf("%s %s", err.Error(), suffix) } return &exception{no: no, suffix: suffix} } var _ net.Error = (*exception)(nil) type exception struct { no syscall.Errno suffix string } func (e *exception) Error() string { var s string if int(e.no)&0x100 != 0 { s = errnos[int(e.no)&ErrnoMask] } if s == "" { s = e.no.Error() } if e.suffix != "" { s += " " + e.suffix } return s } func (e *exception) Is(target error) bool { if e == target { return true } if e.no == target { return true } // TODO: ErrConnClosed contains ErrEOF if e.no == ErrEOF && target == ErrConnClosed { return true } return e.no.Is(target) } func (e *exception) Unwrap() error { return e.no } func (e *exception) Timeout() bool { switch e.no { case ErrDialTimeout, ErrReadTimeout, ErrWriteTimeout: return true } return e.no.Timeout() } func (e *exception) Temporary() bool { return e.no.Temporary() } // Errors defined in netpoll var errnos = [...]string{ ErrnoMask & ErrConnClosed: "connection has been closed", ErrnoMask & ErrReadTimeout: "connection read timeout", ErrnoMask & ErrDialTimeout: "dial wait timeout", ErrnoMask & ErrDialNoDeadline: "dial no deadline", ErrnoMask & ErrUnsupported: "netpoll does not support", ErrnoMask & ErrEOF: "EOF", ErrnoMask & ErrWriteTimeout: "connection write timeout", ErrnoMask & ErrConcurrentAccess: "concurrent connection access", } ================================================ FILE: connection_errors_test.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !windows // +build !windows package netpoll import ( "errors" "syscall" "testing" ) func TestErrno(t *testing.T) { var err1 error = Exception(ErrConnClosed, "when next") MustTrue(t, errors.Is(err1, ErrConnClosed)) Equal(t, err1.Error(), "connection has been closed when next") t.Logf("error1=%s", err1) var err2 error = Exception(syscall.EPIPE, "when flush") MustTrue(t, errors.Is(err2, syscall.EPIPE)) Equal(t, err2.Error(), "broken pipe when flush") t.Logf("error2=%s", err2) } ================================================ FILE: connection_impl.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !windows // +build !windows package netpoll import ( "sync" "sync/atomic" "syscall" "time" ) type connState = int32 const ( connStateNone = 0 connStateConnected = 1 connStateDisconnected = 2 ) // connection is the implementation of Connection type connection struct { netFD onEvent locker operator *FDOperator readTimeout time.Duration readDeadline int64 // UnixNano(). it overwrites readTimeout. 0 if not set. readTimer *time.Timer readTrigger chan error waitReadSize int64 writeTimeout time.Duration writeDeadline int64 // UnixNano(). it overwrites writeTimeout. 0 if not set. writeTimer *time.Timer writeTrigger chan error inputBuffer *LinkBuffer outputBuffer *LinkBuffer outputBarrier *barrier maxSize int // The maximum size of data between two Release(). bookSize int // The size of data that can be read at once. state connState // Connection state should be changed sequentially. } var ( _ Connection = &connection{} _ Reader = &connection{} _ Writer = &connection{} ) // Reader implements Connection. func (c *connection) Reader() Reader { return c } // Writer implements Connection. func (c *connection) Writer() Writer { return c } // IsActive implements Connection. func (c *connection) IsActive() bool { return c.isCloseBy(none) } // SetIdleTimeout implements Connection. func (c *connection) SetIdleTimeout(timeout time.Duration) error { if timeout > 0 { return c.SetKeepAlive(int(timeout.Seconds())) } return nil } // SetReadTimeout implements Connection. func (c *connection) SetReadTimeout(timeout time.Duration) error { if timeout >= 0 { c.readTimeout = timeout } c.readDeadline = 0 return nil } // SetWriteTimeout implements Connection. func (c *connection) SetWriteTimeout(timeout time.Duration) error { if timeout >= 0 { c.writeTimeout = timeout } c.writeDeadline = 0 return nil } // SetDeadline implements net.Conn.SetDeadline func (c *connection) SetDeadline(t time.Time) error { v := int64(0) if !t.IsZero() { v = t.UnixNano() } c.readDeadline = v c.writeDeadline = v return nil } // SetReadDeadline implements net.Conn.SetReadDeadline func (c *connection) SetReadDeadline(t time.Time) error { if t.IsZero() { c.readDeadline = 0 } else { c.readDeadline = t.UnixNano() } return nil } // SetWriteDeadline implements net.Conn.SetWriteDeadline func (c *connection) SetWriteDeadline(t time.Time) error { if t.IsZero() { c.writeDeadline = 0 } else { c.writeDeadline = t.UnixNano() } return nil } // ------------------------------------------ implement zero-copy reader ------------------------------------------ // Next implements Connection. func (c *connection) Next(n int) (p []byte, err error) { if err = c.waitRead(n); err != nil { return p, err } return c.inputBuffer.Next(n) } // Peek implements Connection. func (c *connection) Peek(n int) (buf []byte, err error) { if err = c.waitRead(n); err != nil { return buf, err } return c.inputBuffer.Peek(n) } // Skip implements Connection. func (c *connection) Skip(n int) (err error) { if err = c.waitRead(n); err != nil { return err } return c.inputBuffer.Skip(n) } // Release implements Connection. func (c *connection) Release() (err error) { // Check inputBuffer length first to reduce contention in mux situation. // c.operator.do competes with c.inputs/c.inputAck if c.inputBuffer.Len() == 0 && c.operator.do() { maxSize := c.inputBuffer.calcMaxSize() // Set the maximum value of maxsize equal to mallocMax to prevent GC pressure. if maxSize > mallocMax { maxSize = mallocMax } if maxSize > c.maxSize { c.maxSize = maxSize } // Double check length to reset tail node if c.inputBuffer.Len() == 0 { c.inputBuffer.resetTail(c.maxSize) } c.operator.done() } return c.inputBuffer.Release() } // Slice implements Connection. func (c *connection) Slice(n int) (r Reader, err error) { if err = c.waitRead(n); err != nil { return nil, err } return c.inputBuffer.Slice(n) } // Len implements Connection. func (c *connection) Len() (length int) { return c.inputBuffer.Len() } // Until implements Connection. func (c *connection) Until(delim byte) (line []byte, err error) { var n, l int for { if err = c.waitRead(n + 1); err != nil { // return all the data in the buffer line, _ = c.inputBuffer.Next(c.inputBuffer.Len()) return } l = c.inputBuffer.Len() i := c.inputBuffer.indexByte(delim, n) if i < 0 { n = l // skip all exists bytes continue } return c.Next(i + 1) } } // ReadString implements Connection. func (c *connection) ReadString(n int) (s string, err error) { if err = c.waitRead(n); err != nil { return s, err } return c.inputBuffer.ReadString(n) } // ReadBinary implements Connection. func (c *connection) ReadBinary(n int) (p []byte, err error) { if err = c.waitRead(n); err != nil { return p, err } return c.inputBuffer.ReadBinary(n) } // ReadByte implements Connection. func (c *connection) ReadByte() (b byte, err error) { if err = c.waitRead(1); err != nil { return b, err } return c.inputBuffer.ReadByte() } // ------------------------------------------ implement zero-copy writer ------------------------------------------ // Malloc implements Connection. func (c *connection) Malloc(n int) (buf []byte, err error) { if !c.IsActive() { return nil, Exception(ErrConnClosed, "when malloc") } return c.outputBuffer.Malloc(n) } // MallocLen implements Connection. func (c *connection) MallocLen() (length int) { return c.outputBuffer.MallocLen() } // Flush will send all malloc data to the peer, // so must confirm that the allocated bytes have been correctly assigned. // // Flush first checks whether the out buffer is empty. // If empty, it will call syscall.Write to send data directly, // otherwise the buffer will be sent asynchronously by the epoll trigger. func (c *connection) Flush() error { if !c.IsActive() { return Exception(ErrConnClosed, "when flush") } if !c.lock(flushing) { return Exception(ErrConcurrentAccess, "when flush") } defer c.unlock(flushing) c.outputBuffer.Flush() return c.flush() } // MallocAck implements Connection. func (c *connection) MallocAck(n int) (err error) { if !c.IsActive() { return Exception(ErrConnClosed, "when malloc ack") } return c.outputBuffer.MallocAck(n) } // Append implements Connection. func (c *connection) Append(w Writer) (err error) { if !c.IsActive() { return Exception(ErrConnClosed, "when append") } return c.outputBuffer.Append(w) } // WriteString implements Connection. func (c *connection) WriteString(s string) (n int, err error) { if !c.IsActive() { return 0, Exception(ErrConnClosed, "when write string") } return c.outputBuffer.WriteString(s) } // WriteBinary implements Connection. func (c *connection) WriteBinary(b []byte) (n int, err error) { if !c.IsActive() { return 0, Exception(ErrConnClosed, "when write binary") } return c.outputBuffer.WriteBinary(b) } // WriteDirect implements Connection. func (c *connection) WriteDirect(p []byte, remainCap int) (err error) { if !c.IsActive() { return Exception(ErrConnClosed, "when write direct") } return c.outputBuffer.WriteDirect(p, remainCap) } // WriteByte implements Connection. func (c *connection) WriteByte(b byte) (err error) { if !c.IsActive() { return Exception(ErrConnClosed, "when write byte") } return c.outputBuffer.WriteByte(b) } // ------------------------------------------ implement net.Conn ------------------------------------------ // Read behavior is the same as net.Conn, it will return io.EOF if buffer is empty. func (c *connection) Read(p []byte) (n int, err error) { if len(p) == 0 { return 0, nil } if err = c.waitRead(1); err != nil { return 0, err } return c.inputBuffer.readCopy(p), nil } // Write will Flush soon. func (c *connection) Write(p []byte) (n int, err error) { if !c.IsActive() { return 0, Exception(ErrConnClosed, "when write") } if !c.lock(flushing) { return 0, Exception(ErrConcurrentAccess, "when write") } defer c.unlock(flushing) dst, _ := c.outputBuffer.Malloc(len(p)) n = copy(dst, p) c.outputBuffer.Flush() err = c.flush() return n, err } // Close implements Connection. func (c *connection) Close() error { return c.onClose() } // Detach detaches the connection from poller but doesn't close it. func (c *connection) Detach() error { c.detaching = true return c.onClose() } // ------------------------------------------ private ------------------------------------------ var barrierPool = sync.Pool{ New: func() interface{} { return &barrier{ bs: make([][]byte, barriercap), ivs: make([]syscall.Iovec, barriercap), } }, } // init initializes the connection with options func (c *connection) init(conn Conn, opts *options) (err error) { // init buffer, barrier, finalizer c.readTrigger = make(chan error, 1) c.writeTrigger = make(chan error, 1) c.bookSize, c.maxSize = defaultLinkBufferSize, defaultLinkBufferSize c.inputBuffer, c.outputBuffer = NewLinkBuffer(defaultLinkBufferSize), NewLinkBuffer() c.outputBarrier = barrierPool.Get().(*barrier) c.state = connStateNone c.initNetFD(conn) // conn must be *netFD{} c.initFDOperator() c.initFinalizer() syscall.SetNonblock(c.fd, true) // enable TCP_NODELAY by default switch c.network { case "tcp", "tcp4", "tcp6": setTCPNoDelay(c.fd, true) } // connection initialized and prepare options return c.onPrepare(opts) } func (c *connection) initNetFD(conn Conn) { if nfd, ok := conn.(*netFD); ok { c.netFD = *nfd return } c.netFD = netFD{ fd: conn.Fd(), localAddr: conn.LocalAddr(), remoteAddr: conn.RemoteAddr(), } } func (c *connection) initFDOperator() { poll := pollmanager.Pick() op := poll.Alloc() op.FD = c.fd op.OnRead, op.OnWrite, op.OnHup = nil, nil, c.onHup op.Inputs, op.InputAck = c.inputs, c.inputAck op.Outputs, op.OutputAck = c.outputs, c.outputAck c.operator = op } func (c *connection) initFinalizer() { c.AddCloseCallback(func(connection Connection) (err error) { c.stop(flushing) c.operator.Free() if err = c.netFD.Close(); err != nil { logger.Printf("NETPOLL: netFD close failed: %v", err) } c.closeBuffer() return nil }) } func (c *connection) triggerRead(err error) { select { case c.readTrigger <- err: default: } } func (c *connection) triggerWrite(err error) { select { case c.writeTrigger <- err: default: } } // waitRead will wait full n bytes. func (c *connection) waitRead(n int) (err error) { if n <= c.inputBuffer.Len() { return nil } atomic.StoreInt64(&c.waitReadSize, int64(n)) defer atomic.StoreInt64(&c.waitReadSize, 0) if dl := c.readDeadline; dl > 0 { timeout := time.Duration(dl - time.Now().UnixNano()) if timeout <= 0 { return Exception(ErrReadTimeout, c.remoteAddr.String()) } return c.waitReadWithTimeout(n, timeout) } else if c.readTimeout > 0 { return c.waitReadWithTimeout(n, c.readTimeout) } // wait full n for c.inputBuffer.Len() < n { switch c.status(closing) { case poller: return Exception(ErrEOF, "wait read") case user: return Exception(ErrConnClosed, "wait read") default: err = <-c.readTrigger if err != nil { return err } } } return nil } // waitReadWithTimeout will wait full n bytes or until timeout. func (c *connection) waitReadWithTimeout(n int, timeout time.Duration) (err error) { if c.readTimer == nil { c.readTimer = time.NewTimer(timeout) } else { c.readTimer.Reset(timeout) } for c.inputBuffer.Len() < n { switch c.status(closing) { case poller: // cannot return directly, stop timer first! err = Exception(ErrEOF, "wait read") goto RET case user: // cannot return directly, stop timer first! err = Exception(ErrConnClosed, "wait read") goto RET default: select { case <-c.readTimer.C: // double check if there is enough data to be read if c.inputBuffer.Len() >= n { return nil } return Exception(ErrReadTimeout, c.remoteAddr.String()) case err = <-c.readTrigger: if err != nil { goto RET } continue } } } RET: // clean timer.C if !c.readTimer.Stop() { <-c.readTimer.C } return err } // flush writes data directly. func (c *connection) flush() error { if c.outputBuffer.IsEmpty() { return nil } bs := c.outputBuffer.GetBytes(c.outputBarrier.bs) n, err := sendmsg(c.fd, bs, c.outputBarrier.ivs, false) if err != nil && err != syscall.EAGAIN { return Exception(err, "when flush") } if n > 0 { err = c.outputBuffer.Skip(n) c.outputBuffer.Release() if err != nil { return Exception(err, "when flush") } } // return if write all buffer. if c.outputBuffer.IsEmpty() { return nil } err = c.operator.Control(PollR2RW) if err != nil { return Exception(err, "when flush") } return c.waitFlush() } func (c *connection) waitFlush() (err error) { timeout := c.writeTimeout if dl := c.writeDeadline; dl > 0 { timeout = time.Duration(dl - time.Now().UnixNano()) if timeout <= 0 { return Exception(ErrWriteTimeout, c.remoteAddr.String()) } } if timeout == 0 { return <-c.writeTrigger } // set write timeout if c.writeTimer == nil { c.writeTimer = time.NewTimer(timeout) } else { c.writeTimer.Reset(timeout) } select { case err = <-c.writeTrigger: if !c.writeTimer.Stop() { // clean timer <-c.writeTimer.C } return err case <-c.writeTimer.C: select { // try fetch writeTrigger if both cases fires case err = <-c.writeTrigger: return err default: } // if timeout, remove write event from poller // we cannot flush it again, since we don't if the poller is still process outputBuffer c.operator.Control(PollRW2R) return Exception(ErrWriteTimeout, c.remoteAddr.String()) } } func (c *connection) getState() connState { return atomic.LoadInt32(&c.state) } func (c *connection) setState(newState connState) { atomic.StoreInt32(&c.state, newState) } func (c *connection) changeState(from, to connState) bool { return atomic.CompareAndSwapInt32(&c.state, from, to) } ================================================ FILE: connection_lock.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package netpoll import ( "runtime" "sync/atomic" ) type who = int32 const ( none who = iota user poller ) type key int32 /* State Diagram +--------------+ +--------------+ | processing |-------->| flushing | +-------+------+ +-------+------+ | | +--------------+ +--------------->| closing | +--------------+ - "processing" locks onRequest handler, and doesn't exist in dialer. - "flushing" locks outputBuffer - "closing" should wait for flushing finished and call the closeCallback after that. */ const ( closing key = iota connecting processing flushing // total must be at the bottom. total ) type locker struct { // keychain used for lock/unlock/stop operation by who. // 0 means unlock, 1 means locked, 2 means stop. keychain [total]int32 } func (l *locker) closeBy(w who) (success bool) { return atomic.CompareAndSwapInt32(&l.keychain[closing], 0, w) } func (l *locker) isCloseBy(w who) (yes bool) { return atomic.LoadInt32(&l.keychain[closing]) == w } func (l *locker) status(k key) int32 { return atomic.LoadInt32(&l.keychain[k]) } func (l *locker) force(k key, v int32) { atomic.StoreInt32(&l.keychain[k], v) } func (l *locker) lock(k key) (success bool) { return atomic.CompareAndSwapInt32(&l.keychain[k], 0, 1) } func (l *locker) unlock(k key) { atomic.StoreInt32(&l.keychain[k], 0) } func (l *locker) stop(k key) { for !atomic.CompareAndSwapInt32(&l.keychain[k], 0, 2) && atomic.LoadInt32(&l.keychain[k]) != 2 { runtime.Gosched() } } func (l *locker) isUnlock(k key) bool { return atomic.LoadInt32(&l.keychain[k]) == 0 } ================================================ FILE: connection_onevent.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !windows // +build !windows package netpoll import ( "context" "sync/atomic" "github.com/cloudwego/netpoll/internal/runner" ) // ------------------------------------ implement OnPrepare, OnRequest, CloseCallback ------------------------------------ type gracefulExit interface { isIdle() (yes bool) Close() (err error) } // onEvent is the collection of event processing. // OnPrepare, OnRequest, CloseCallback share the lock processing, // which is a CAS lock and can only be cleared by OnRequest. type onEvent struct { ctx context.Context onConnectCallback atomic.Value onDisconnectCallback atomic.Value onRequestCallback atomic.Value closeCallbacks atomic.Value // value is latest *callbackNode } type callbackNode struct { fn CloseCallback pre *callbackNode } // SetOnConnect set the OnConnect callback. func (c *connection) SetOnConnect(onConnect OnConnect) error { if onConnect != nil { c.onConnectCallback.Store(onConnect) } return nil } // SetOnDisconnect set the OnDisconnect callback. func (c *connection) SetOnDisconnect(onDisconnect OnDisconnect) error { if onDisconnect != nil { c.onDisconnectCallback.Store(onDisconnect) } return nil } // SetOnRequest initialize ctx when setting OnRequest. func (c *connection) SetOnRequest(onRequest OnRequest) error { if onRequest == nil { return nil } c.onRequestCallback.Store(onRequest) // fix: trigger OnRequest if there is already input data. if !c.inputBuffer.IsEmpty() { c.onRequest() } return nil } // AddCloseCallback adds a CloseCallback to this connection. func (c *connection) AddCloseCallback(callback CloseCallback) error { if callback == nil { return nil } cb := &callbackNode{} cb.fn = callback if pre := c.closeCallbacks.Load(); pre != nil { cb.pre = pre.(*callbackNode) } c.closeCallbacks.Store(cb) return nil } // onPrepare supports close connection, but not read/write data. // connection will be registered by this call after preparing. func (c *connection) onPrepare(opts *options) (err error) { if opts != nil { c.SetOnConnect(opts.onConnect) c.SetOnDisconnect(opts.onDisconnect) c.SetOnRequest(opts.onRequest) c.SetReadTimeout(opts.readTimeout) c.SetWriteTimeout(opts.writeTimeout) c.SetIdleTimeout(opts.idleTimeout) // calling prepare first and then register. if opts.onPrepare != nil { c.ctx = opts.onPrepare(c) } } if c.ctx == nil { c.ctx = context.Background() } // prepare may close the connection. if c.IsActive() { return c.register() } return nil } // onConnect is responsible for executing onRequest if there is new data coming after onConnect callback finished. func (c *connection) onConnect() { onConnect, _ := c.onConnectCallback.Load().(OnConnect) if onConnect == nil { c.changeState(connStateNone, connStateConnected) return } if !c.lock(connecting) { // it never happens because onDisconnect will not lock connecting if c.connected == 0 return } onRequest, _ := c.onRequestCallback.Load().(OnRequest) c.onProcess(onConnect, onRequest) } // when onDisconnect called, c.IsActive() must return false func (c *connection) onDisconnect() { onDisconnect, _ := c.onDisconnectCallback.Load().(OnDisconnect) if onDisconnect == nil { return } onConnect, _ := c.onConnectCallback.Load().(OnConnect) if onConnect == nil { // no need lock if onConnect is nil // it's ok to force set state to disconnected since onConnect is nil c.setState(connStateDisconnected) onDisconnect(c.ctx, c) return } // check if OnConnect finished when onConnect != nil && onDisconnect != nil if c.getState() != connStateNone && c.lock(connecting) { // means OnConnect already finished // protect onDisconnect run once // if CAS return false, means OnConnect already helps to run onDisconnect if c.changeState(connStateConnected, connStateDisconnected) { onDisconnect(c.ctx, c) } c.unlock(connecting) return } // OnConnect is not finished yet, return and let onConnect helps to call onDisconnect } // onRequest is responsible for executing the closeCallbacks after the connection has been closed. func (c *connection) onRequest() (needTrigger bool) { onRequest, ok := c.onRequestCallback.Load().(OnRequest) if !ok { return true } // wait onConnect finished first if c.getState() == connStateNone && c.onConnectCallback.Load() != nil { // let onConnect to call onRequest return } processed := c.onProcess(nil, onRequest) // if not processed, should trigger read return !processed } // onProcess is responsible for executing the onConnect/onRequest function serially, // and make sure the connection has been closed correctly if user call c.Close() in onConnect/onRequest function. func (c *connection) onProcess(onConnect OnConnect, onRequest OnRequest) (processed bool) { // task already exists if !c.lock(processing) { return false } task := func() { panicked := true defer func() { if !panicked { return } // cannot use recover() here, since we don't want to break the panic stack c.unlock(processing) if c.IsActive() { c.Close() } else { c.closeCallback(false, false) } }() // trigger onConnect first if onConnect != nil && c.changeState(connStateNone, connStateConnected) { c.ctx = onConnect(c.ctx, c) if !c.IsActive() && c.changeState(connStateConnected, connStateDisconnected) { // since we hold connecting lock, so we should help to call onDisconnect here onDisconnect, _ := c.onDisconnectCallback.Load().(OnDisconnect) if onDisconnect != nil { onDisconnect(c.ctx, c) } } c.unlock(connecting) } START: // The `onRequest` must be executed at least once if conn have any readable data, // which is in order to cover the `send & close by peer` case. if onRequest != nil && c.Reader().Len() > 0 { _ = onRequest(c.ctx, c) } // The processing loop must ensure that the connection meets `IsActive`. // `onRequest` must either eventually read all the input data or actively Close the connection, // otherwise the goroutine will fall into a dead loop. var closedBy who for { closedBy = c.status(closing) // close by user or not processable if closedBy == user || onRequest == nil || c.Reader().Len() == 0 { break } _ = onRequest(c.ctx, c) } // handling callback if connection has been closed. if closedBy != none { // if closed by user when processing, it "may" needs detach needDetach := closedBy == user // Here is a corner case that operator will be detached twice: // If server closed the connection(client OnHup will detach op first and closeBy=poller), // and then client's OnRequest function also closed the connection(closeBy=user). // But operator already prevent that detach twice will not cause any problem c.closeCallback(false, needDetach) panicked = false return } c.unlock(processing) // Note: Poller's closeCallback call will try to get processing lock failed but here already near to unlock processing. // So here we need to check connection state again, to avoid connection leak // double check close state if c.status(closing) != 0 && c.lock(processing) { // poller will get the processing lock failed, here help poller do closeCallback // fd must already detach by poller c.closeCallback(false, false) panicked = false return } // double check is processable if onRequest != nil && c.Reader().Len() > 0 && c.lock(processing) { goto START } // task exits panicked = false } // end of task closure func // add new task runner.RunTask(c.ctx, task) return true } // closeCallback . // It can be confirmed that closeCallback and onRequest will not be executed concurrently. // If onRequest is still running, it will trigger closeCallback on exit. func (c *connection) closeCallback(needLock, needDetach bool) (err error) { if needLock && !c.lock(processing) { return nil } if needDetach && c.operator.poll != nil { // If Close is called during OnPrepare, poll is not registered. // PollDetach only happen when user call conn.Close() or poller detect error if err := c.operator.Control(PollDetach); err != nil { logger.Printf("NETPOLL: closeCallback[%v,%v] detach operator failed: %v", needLock, needDetach, err) } } latest := c.closeCallbacks.Load() if latest == nil { return nil } for callback := latest.(*callbackNode); callback != nil; callback = callback.pre { callback.fn(c) } return nil } // register only use for connection register into poll. func (c *connection) register() (err error) { err = c.operator.Control(PollReadable) if err != nil { logger.Printf("NETPOLL: connection register failed: %v", err) c.Close() return Exception(ErrConnClosed, err.Error()) } return nil } // isIdle implements gracefulExit. func (c *connection) isIdle() (yes bool) { return c.isUnlock(processing) && c.inputBuffer.IsEmpty() && c.outputBuffer.IsEmpty() } ================================================ FILE: connection_reactor.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !windows // +build !windows package netpoll import ( "sync/atomic" ) // ------------------------------------------ implement FDOperator ------------------------------------------ // onHup means close by poller. func (c *connection) onHup(p Poll) error { if !c.closeBy(poller) { return nil } c.triggerRead(Exception(ErrEOF, "peer close")) c.triggerWrite(Exception(ErrConnClosed, "peer close")) // call Disconnect callback first c.onDisconnect() // It depends on closing by user if OnConnect and OnRequest is nil, otherwise it needs to be released actively. // It can be confirmed that the OnRequest goroutine has been exited before closeCallback executing, // and it is safe to close the buffer at this time. onConnect := c.onConnectCallback.Load() onRequest := c.onRequestCallback.Load() needCloseByUser := onConnect == nil && onRequest == nil if !needCloseByUser { // already PollDetach when call OnHup c.closeCallback(true, false) } return nil } // onClose means close by user. func (c *connection) onClose() error { // user code close the connection if c.closeBy(user) { c.triggerRead(Exception(ErrConnClosed, "self close")) c.triggerWrite(Exception(ErrConnClosed, "self close")) // Detach from poller when processing finished, otherwise it will cause race c.closeCallback(true, true) return nil } // closed by poller // still need to change closing status to `user` since OnProcess should not be processed again c.force(closing, user) // user code should actively close the connection to recycle resources. // poller already detached operator return c.closeCallback(true, false) } // closeBuffer recycle input & output LinkBuffer. func (c *connection) closeBuffer() { onConnect, _ := c.onConnectCallback.Load().(OnConnect) onRequest, _ := c.onRequestCallback.Load().(OnRequest) // if client close the connection, we cannot ensure that the poller is not process the buffer, // so we need to check the buffer length, and if it's an "unclean" close operation, let's give up to reuse the buffer if c.inputBuffer.Len() == 0 || onConnect != nil || onRequest != nil { c.inputBuffer.Close() } if c.outputBuffer.Len() == 0 || onConnect != nil || onRequest != nil { c.outputBuffer.Close() barrierPool.Put(c.outputBarrier) } } // inputs implements FDOperator. func (c *connection) inputs(vs [][]byte) (rs [][]byte) { vs[0] = c.inputBuffer.book(c.bookSize, c.maxSize) return vs[:1] } // inputAck implements FDOperator. func (c *connection) inputAck(n int) (err error) { if n <= 0 { c.inputBuffer.bookAck(0) return nil } // Auto size bookSize. if n == c.bookSize && c.bookSize < mallocMax { c.bookSize <<= 1 } length, _ := c.inputBuffer.bookAck(n) if c.maxSize < length { c.maxSize = length } if c.maxSize > mallocMax { c.maxSize = mallocMax } needTrigger := true if length == n { // first start onRequest needTrigger = c.onRequest() } if needTrigger && length >= int(atomic.LoadInt64(&c.waitReadSize)) { c.triggerRead(nil) } return nil } // outputs implements FDOperator. func (c *connection) outputs(vs [][]byte) (rs [][]byte, _ bool) { if c.outputBuffer.IsEmpty() { c.rw2r() return rs, false } rs = c.outputBuffer.GetBytes(vs) return rs, false } // outputAck implements FDOperator. func (c *connection) outputAck(n int) (err error) { if n > 0 { c.outputBuffer.Skip(n) c.outputBuffer.Release() } if c.outputBuffer.IsEmpty() { c.rw2r() } return nil } // rw2r removed the monitoring of write events. func (c *connection) rw2r() { c.operator.Control(PollRW2R) c.triggerWrite(nil) } ================================================ FILE: connection_test.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !windows // +build !windows package netpoll import ( "context" "errors" "fmt" "net" "os" "runtime" "strings" "sync" "sync/atomic" "syscall" "testing" "time" ) func BenchmarkConnectionIO(b *testing.B) { dataSize := 1024 * 16 writeBuffer := make([]byte, dataSize) rfd, wfd := GetSysFdPairs() rconn, wconn := new(connection), new(connection) rconn.init(&netFD{fd: rfd}, &options{onRequest: func(ctx context.Context, connection Connection) error { read, _ := connection.Reader().Next(dataSize) _ = wconn.Reader().Release() _, _ = connection.Writer().WriteBinary(read) _ = connection.Writer().Flush() return nil }}) wconn.init(&netFD{fd: wfd}, new(options)) b.ResetTimer() b.ReportAllocs() for i := 0; i < b.N; i++ { _, _ = wconn.WriteBinary(writeBuffer) _ = wconn.Flush() _, _ = wconn.Reader().Next(dataSize) _ = wconn.Reader().Release() } } func TestConnectionWrite(t *testing.T) { cycle, caps := 10000, 256 msg, buf := make([]byte, caps), make([]byte, caps) var wg sync.WaitGroup wg.Add(1) var count int32 expect := int32(cycle * caps) opts := &options{} opts.onRequest = func(ctx context.Context, connection Connection) error { n, err := connection.Read(buf) MustNil(t, err) if atomic.AddInt32(&count, int32(n)) >= expect { wg.Done() } return nil } r, w := GetSysFdPairs() rconn, wconn := &connection{}, &connection{} rconn.init(&netFD{fd: r}, opts) wconn.init(&netFD{fd: w}, opts) for i := 0; i < cycle; i++ { n, err := wconn.Write(msg) MustNil(t, err) Equal(t, n, len(msg)) } wg.Wait() Equal(t, atomic.LoadInt32(&count), expect) rconn.Close() } func TestConnectionLargeWrite(t *testing.T) { // ci machine don't have 4GB memory, so skip test t.Skipf("skip large write test for ci job") totalSize := 1024 * 1024 * 1024 * 4 var wg sync.WaitGroup wg.Add(1) opts := &options{} opts.onRequest = func(ctx context.Context, connection Connection) error { if connection.Reader().Len() < totalSize { return nil } _, err := connection.Reader().Next(totalSize) MustNil(t, err) err = connection.Reader().Release() MustNil(t, err) wg.Done() return nil } r, w := GetSysFdPairs() rconn, wconn := &connection{}, &connection{} rconn.init(&netFD{fd: r}, opts) wconn.init(&netFD{fd: w}, opts) msg := make([]byte, totalSize/4) for i := 0; i < 4; i++ { _, err := wconn.Writer().WriteBinary(msg) MustNil(t, err) } wg.Wait() rconn.Close() } func TestConnectionRead(t *testing.T) { r, w := GetSysFdPairs() rconn, wconn := &connection{}, &connection{} err := rconn.init(&netFD{fd: r}, nil) MustNil(t, err) err = wconn.init(&netFD{fd: w}, nil) MustNil(t, err) size := 256 cycleTime := 1000 msg := make([]byte, size) var wg sync.WaitGroup wg.Add(1) go func() { defer wg.Done() for i := 0; i < cycleTime; i++ { buf, err := rconn.Reader().Next(size) MustNil(t, err) Equal(t, len(buf), size) rconn.Reader().Release() } }() for i := 0; i < cycleTime; i++ { n, err := wconn.Write(msg) MustNil(t, err) Equal(t, n, len(msg)) } wg.Wait() rconn.Close() } // TestConnectionIOReader tests the io.Reader Read method which uses readCopy internally. // Verifies that Read after Peek preserves exposed buffer until Release. func TestConnectionIOReader(t *testing.T) { r, w := GetSysFdPairs() rconn := &connection{} rconn.init(&netFD{fd: r}, nil) msg := make([]byte, 64) for i := range msg { msg[i] = byte(i) } var wg sync.WaitGroup wg.Add(1) go func() { defer wg.Done() // Peek exposes the underlying buffer pk, err := rconn.Peek(16) MustNil(t, err) Equal(t, len(pk), 16) // Read copies without exposing buf := make([]byte, 64) n, err := rconn.Read(buf) MustNil(t, err) Equal(t, n, 64) for i := 0; i < 64; i++ { Equal(t, buf[i], byte(i)) } // Peek data still valid before Release for i := 0; i < 16; i++ { Equal(t, pk[i], byte(i)) } rconn.Release() }() syscall.Write(w, msg) wg.Wait() rconn.Close() syscall.Close(w) } func TestConnectionReadAfterClosed(t *testing.T) { r, w := GetSysFdPairs() rconn := &connection{} rconn.init(&netFD{fd: r}, nil) size := 256 msg := make([]byte, size) var wg sync.WaitGroup wg.Add(1) go func() { defer wg.Done() buf, err := rconn.Reader().Next(size) MustNil(t, err) Equal(t, len(buf), size) }() time.Sleep(time.Millisecond) syscall.Write(w, msg) syscall.Close(w) wg.Wait() } func TestConnectionWaitReadHalfPacket(t *testing.T) { r, w := GetSysFdPairs() rconn := &connection{} rconn.init(&netFD{fd: r}, nil) size := pagesize * 2 msg := make([]byte, size) // write half packet syscall.Write(w, msg[:size/2]) // wait poller reads buffer for rconn.inputBuffer.Len() <= 0 { runtime.Gosched() } // wait read full packet var wg sync.WaitGroup wg.Add(1) go func() { defer wg.Done() buf, err := rconn.Reader().Next(size) Equal(t, atomic.LoadInt64(&rconn.waitReadSize), int64(0)) MustNil(t, err) Equal(t, len(buf), size) }() // write left half packet for atomic.LoadInt64(&rconn.waitReadSize) <= 0 { runtime.Gosched() } Equal(t, atomic.LoadInt64(&rconn.waitReadSize), int64(size)) syscall.Write(w, msg[size/2:]) wg.Wait() } func TestReadTimer(t *testing.T) { read := time.NewTimer(time.Second) MustTrue(t, read.Stop()) time.Sleep(time.Millisecond) Equal(t, len(read.C), 0) } func TestReadTrigger(t *testing.T) { trigger := make(chan int, 1) select { case trigger <- 0: default: } Equal(t, len(trigger), 1) } func writeAll(fd int, buf []byte) error { for len(buf) > 0 { n, err := syscall.Write(fd, buf) if n < 0 { return err } buf = buf[n:] } return nil } func createTestTCPListener(t *testing.T) net.Listener { ln, err := net.Listen("tcp", "127.0.0.1:0") MustNil(t, err) return ln } // Large packet write test. The socket buffer is 2MB by default, here to verify // whether Connection.Close can be executed normally after socket output buffer is full. func TestLargeBufferWrite(t *testing.T) { ln := createTestTCPListener(t) defer ln.Close() address := ln.Addr().String() ln, err := ConvertListener(ln) MustNil(t, err) trigger := make(chan int) defer close(trigger) go func() { for { conn, err := ln.Accept() if conn == nil && err == nil { continue } trigger <- conn.(*netFD).fd <-trigger err = ln.Close() MustNil(t, err) return } }() conn, err := DialConnection("tcp", address, time.Second) MustNil(t, err) rfd := <-trigger var wg sync.WaitGroup wg.Add(1) bufferSize := 2 * 1024 * 1024 // 2MB round := 128 // start large buffer writing go func() { defer wg.Done() for i := 1; i <= round+1; i++ { _, err := conn.Writer().Malloc(bufferSize) MustNil(t, err) err = conn.Writer().Flush() if i <= round { MustNil(t, err) } } }() // wait socket buffer full time.Sleep(time.Millisecond * 100) buf := make([]byte, 1024) for received := 0; received < round*bufferSize; { n, _ := syscall.Read(rfd, buf) received += n } // close success err = conn.Close() MustNil(t, err) wg.Wait() trigger <- 1 } func TestConnectionTimeout(t *testing.T) { ln, err := net.Listen("tcp", "127.0.0.1:0") MustNil(t, err) defer ln.Close() const ( bufsz = 1 << 20 interval = 10 * time.Millisecond ) calcRate := func(n int32) int32 { v := n / int32(time.Second/interval) if v > bufsz { panic(v) } if v < 1 { return 1 } return v } wn := int32(1) // for each Read, must <= bufsz setServerWriteRate := func(n int32) { atomic.StoreInt32(&wn, calcRate(n)) } rn := int32(1) // for each Write, must <= bufsz setServerReadRate := func(n int32) { atomic.StoreInt32(&rn, calcRate(n)) } go func() { for { conn, err := ln.Accept() if err != nil { return } // set small SO_SNDBUF/SO_RCVBUF buffer for better control timeout test tcpconn := conn.(*net.TCPConn) tcpconn.SetReadBuffer(512) tcpconn.SetWriteBuffer(512) go func() { buf := make([]byte, bufsz) for { n := atomic.LoadInt32(&rn) _, err := conn.Read(buf[:int(n)]) if err != nil { conn.Close() return } time.Sleep(interval) } }() go func() { buf := make([]byte, bufsz) for { n := atomic.LoadInt32(&wn) _, err := conn.Write(buf[:int(n)]) if err != nil { conn.Close() return } time.Sleep(interval) } }() } }() newConn := func() Connection { conn, err := DialConnection("tcp", ln.Addr().String(), time.Second) MustNil(t, err) fd := conn.(Conn).Fd() // set small SO_SNDBUF/SO_RCVBUF buffer for better control timeout test err = syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_SNDBUF, 512) MustNil(t, err) err = syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_RCVBUF, 512) MustNil(t, err) return conn } mallocAndFlush := func(conn Connection, sz int) error { _, err := conn.Writer().Malloc(sz) MustNil(t, err) return conn.Writer().Flush() } t.Run("TestWriteTimeout", func(t *testing.T) { setServerReadRate(10 << 10) // 10KB/s conn := newConn() defer conn.Close() // write 1KB without timeout err := mallocAndFlush(conn, 1<<10) // ~100ms MustNil(t, err) // write 50ms timeout _ = conn.SetWriteTimeout(50 * time.Millisecond) err = mallocAndFlush(conn, 1<<20) MustTrue(t, errors.Is(err, ErrWriteTimeout)) }) t.Run("TestReadTimeout", func(t *testing.T) { setServerWriteRate(10 << 10) // 10KB/s conn := newConn() defer conn.Close() // read 1KB without timeout _, err := conn.Reader().Next(1 << 10) // ~100ms MustNil(t, err) // read 20KB ~ 2s, 50ms timeout _ = conn.SetReadTimeout(50 * time.Millisecond) _, err = conn.Reader().Next(20 << 10) MustTrue(t, errors.Is(err, ErrReadTimeout)) }) t.Run("TestWriteDeadline", func(t *testing.T) { setServerReadRate(10 << 10) // 10KB/s conn := newConn() defer conn.Close() // write 1KB without deadline err := conn.SetWriteDeadline(time.Now()) MustNil(t, err) err = conn.SetDeadline(time.Time{}) MustNil(t, err) err = mallocAndFlush(conn, 1<<10) // ~100ms MustNil(t, err) // write with deadline err = conn.SetWriteDeadline(time.Now().Add(50 * time.Millisecond)) MustNil(t, err) t0 := time.Now() err = mallocAndFlush(conn, 1<<20) MustTrue(t, errors.Is(err, ErrWriteTimeout)) MustTrue(t, time.Since(t0)-50*time.Millisecond < 20*time.Millisecond) // write deadline exceeded t1 := time.Now() err = mallocAndFlush(conn, 10<<10) MustTrue(t, errors.Is(err, ErrWriteTimeout)) MustTrue(t, time.Since(t1) < 20*time.Millisecond) }) t.Run("TestReadDeadline", func(t *testing.T) { setServerWriteRate(20 << 10) // 20KB/s conn := newConn() defer conn.Close() // read 1KB without deadline err := conn.SetReadDeadline(time.Now()) MustNil(t, err) err = conn.SetDeadline(time.Time{}) MustNil(t, err) _, err = conn.Reader().Next(1 << 10) MustNil(t, err) // read 100KB with deadline err = conn.SetReadDeadline(time.Now().Add(50 * time.Millisecond)) MustNil(t, err) t0 := time.Now() _, err = conn.Reader().Next(100 << 10) MustTrue(t, errors.Is(err, ErrReadTimeout)) MustTrue(t, time.Since(t0)-50*time.Millisecond < 20*time.Millisecond) // read 10KB, deadline exceeded t1 := time.Now() _, err = conn.Reader().Next(10 << 10) MustTrue(t, errors.Is(err, ErrReadTimeout)) MustTrue(t, time.Since(t1) < 20*time.Millisecond) }) } // TestConnectionLargeMemory is used to verify the memory usage in the large package scenario. func TestConnectionLargeMemory(t *testing.T) { var start, end runtime.MemStats runtime.GC() runtime.ReadMemStats(&start) r, w := GetSysFdPairs() rconn := &connection{} rconn.init(&netFD{fd: r}, nil) var wg sync.WaitGroup rn, wn := 1024, 1*1024*1024 wg.Add(1) go func() { defer wg.Done() _, err := rconn.Reader().Next(wn) MustNil(t, err) }() msg := make([]byte, rn) for i := 0; i < wn/rn; i++ { n, err := syscall.Write(w, msg) if err != nil { MustNil(t, err) } Equal(t, n, rn) } runtime.ReadMemStats(&end) alloc := end.TotalAlloc - start.TotalAlloc limit := uint64(4 * 1024 * 1024) Assert(t, alloc <= limit, fmt.Sprintf("alloc[%d] out of memory %d", alloc, limit)) } // TestSetTCPNoDelay is used to verify the connection initialization set the TCP_NODELAY correctly func TestSetTCPNoDelay(t *testing.T) { fd, err := sysSocket(syscall.AF_INET, syscall.SOCK_STREAM, 0) MustNil(t, err) conn := &connection{} conn.init(&netFD{network: "tcp", fd: fd}, nil) n, _ := syscall.GetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_NODELAY) MustTrue(t, n > 0) err = setTCPNoDelay(fd, false) MustNil(t, err) n, _ = syscall.GetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_NODELAY) MustTrue(t, n == 0) } func TestConnectionUntil(t *testing.T) { r, w := GetSysFdPairs() rconn, wconn := &connection{}, &connection{} rconn.init(&netFD{fd: r}, nil) wconn.init(&netFD{fd: w}, nil) loopSize := 10000 msg := make([]byte, 1002) msg[500], msg[1001] = '\n', '\n' go func() { for i := 0; i < loopSize; i++ { n, err := wconn.Write(msg) MustNil(t, err) MustTrue(t, n == len(msg)) } wconn.Write(msg[:100]) wconn.Close() }() for i := 0; i < loopSize*2; i++ { buf, err := rconn.Reader().Until('\n') MustNil(t, err) Equal(t, len(buf), 501) rconn.Reader().Release() } buf, err := rconn.Reader().Until('\n') Equal(t, len(buf), 100) Assert(t, errors.Is(err, ErrEOF), err) } func TestBookSizeLargerThanMaxSize(t *testing.T) { r, w := GetSysFdPairs() rconn, wconn := &connection{}, &connection{} err := rconn.init(&netFD{fd: r}, nil) MustNil(t, err) err = wconn.init(&netFD{fd: w}, nil) MustNil(t, err) // prepare data maxSize := 1024 * 1024 * 128 origin := make([][]byte, 0) for size := maxSize; size > 0; size = size >> 1 { ch := 'a' + byte(size%26) origin = append(origin, make([]byte, size)) for i := 0; i < size; i++ { origin[len(origin)-1][i] = ch } } // read var wg sync.WaitGroup wg.Add(1) go func() { defer wg.Done() idx := 0 for size := maxSize; size > 0; size = size >> 1 { buf, err := rconn.Reader().Next(size) MustNil(t, err) Equal(t, string(buf), string(origin[idx])) err = rconn.Reader().Release() MustNil(t, err) idx++ } }() // write for i := 0; i < len(origin); i++ { n, err := wconn.Write(origin[i]) MustNil(t, err) Equal(t, n, len(origin[i])) } wg.Wait() rconn.Close() wconn.Close() } func TestConnDetach(t *testing.T) { ln := createTestTCPListener(t) defer ln.Close() address := ln.Addr().String() // accept => read => write var wg sync.WaitGroup go func() { for { conn, err := ln.Accept() if err != nil { return } if conn == nil { continue } wg.Add(1) go func() { defer wg.Done() buf := make([]byte, 1024) // slow read _, err := conn.Read(buf) if err != nil { return } time.Sleep(10 * time.Millisecond) _, err = conn.Write(buf) if err != nil { return } }() } }() // dial => detach => write => read c, err := DialConnection("tcp", address, time.Second) MustNil(t, err) conn := c.(*TCPConnection) err = conn.Detach() MustNil(t, err) f := os.NewFile(uintptr(conn.fd), "netpoll-connection") defer f.Close() gonetconn, err := net.FileConn(f) MustNil(t, err) buf := make([]byte, 1024) _, err = gonetconn.Write(buf) MustNil(t, err) _, err = gonetconn.Read(buf) MustNil(t, err) err = gonetconn.Close() MustNil(t, err) err = ln.Close() MustNil(t, err) err = c.Close() MustNil(t, err) wg.Wait() } func TestParallelShortConnection(t *testing.T) { ln := createTestTCPListener(t) defer ln.Close() address := ln.Addr().String() var received int64 el, err := NewEventLoop(func(ctx context.Context, connection Connection) error { data, err := connection.Reader().Next(connection.Reader().Len()) atomic.AddInt64(&received, int64(len(data))) if err != nil { return err } // t.Logf("conn[%s] received: %d, active: %v", connection.RemoteAddr(), len(data), connection.IsActive()) return nil }) MustNil(t, err) go func() { el.Serve(ln) }() defer el.Shutdown(context.Background()) conns := 100 sizePerConn := 1024 totalSize := conns * sizePerConn var wg sync.WaitGroup for i := 0; i < conns; i++ { wg.Add(1) go func() { defer wg.Done() conn, err := DialConnection("tcp", address, time.Second) MustNil(t, err) n, err := conn.Writer().WriteBinary(make([]byte, sizePerConn)) MustNil(t, err) MustTrue(t, n == sizePerConn) err = conn.Writer().Flush() MustNil(t, err) err = conn.Close() MustNil(t, err) }() } wg.Wait() t0 := time.Now() for atomic.LoadInt64(&received) < int64(totalSize) { time.Sleep(time.Millisecond) if time.Since(t0) > 100*time.Millisecond { // max wait 100ms break } } Equal(t, atomic.LoadInt64(&received), int64(totalSize)) } func TestConnectionServerClose(t *testing.T) { ln := createTestTCPListener(t) defer ln.Close() address := ln.Addr().String() /* Client Server - Client --- connect --> Server - Client <-- [ping] --- Server - Client --- [pong] --> Server - Client <-- close --- Server - Client --- close --> Server */ const PING, PONG = "ping", "pong" var wg sync.WaitGroup el, err := NewEventLoop( func(ctx context.Context, connection Connection) error { t.Logf("server.OnRequest: addr=%s", connection.RemoteAddr()) defer wg.Done() buf, err := connection.Reader().Next(len(PONG)) // pong Equal(t, string(buf), PONG) MustNil(t, err) err = connection.Reader().Release() MustNil(t, err) err = connection.Close() MustNil(t, err) return err }, WithOnConnect(func(ctx context.Context, connection Connection) context.Context { t.Logf("server.OnConnect: addr=%s", connection.RemoteAddr()) defer wg.Done() // check OnPrepare v := ctx.Value("prepare").(string) Equal(t, v, "true") _, err := connection.Writer().WriteBinary([]byte(PING)) MustNil(t, err) err = connection.Writer().Flush() MustNil(t, err) connection.AddCloseCallback(func(connection Connection) error { t.Logf("server.CloseCallback: addr=%s", connection.RemoteAddr()) wg.Done() return nil }) return ctx }), WithOnPrepare(func(connection Connection) context.Context { t.Logf("server.OnPrepare: addr=%s", connection.RemoteAddr()) defer wg.Done() //nolint:staticcheck // SA1029 no built-in type string as key return context.WithValue(context.Background(), "prepare", "true") }), ) MustNil(t, err) defer el.Shutdown(context.Background()) go func() { err := el.Serve(ln) if err != nil { t.Logf("service end with error: %v", err) } }() var clientOnRequest OnRequest = func(ctx context.Context, connection Connection) error { t.Logf("client.OnRequest: addr=%s", connection.LocalAddr()) defer wg.Done() buf, err := connection.Reader().Next(len(PING)) MustNil(t, err) Equal(t, string(buf), PING) _, err = connection.Writer().WriteBinary([]byte(PONG)) MustNil(t, err) err = connection.Writer().Flush() MustNil(t, err) _, err = connection.Reader().Next(1) // server will not send any data, just wait for server close MustTrue(t, errors.Is(err, ErrEOF)) // should get EOF when server close return connection.Close() } conns := 10 // server: OnPrepare, OnConnect, OnRequest, CloseCallback // client: OnRequest, CloseCallback wg.Add(conns * 6) for i := 0; i < conns; i++ { go func() { conn, err := DialConnection("tcp", address, time.Second) MustNil(t, err) err = conn.SetOnRequest(clientOnRequest) MustNil(t, err) conn.AddCloseCallback(func(connection Connection) error { t.Logf("client.CloseCallback: addr=%s", connection.LocalAddr()) defer wg.Done() return nil }) }() } wg.Wait() } func TestWriterAfterClose(t *testing.T) { r, w := GetSysFdPairs() rconn, wconn := &connection{}, &connection{} rconn.init(&netFD{fd: r}, nil) wconn.init(&netFD{fd: w}, nil) err := wconn.Close() MustNil(t, err) for wconn.IsActive() { runtime.Gosched() } methods := []struct { name string fn func() error }{ {"Malloc", func() error { _, err := wconn.Malloc(1); return err }}, {"MallocAck", func() error { return wconn.MallocAck(0) }}, {"WriteBinary", func() error { _, err := wconn.WriteBinary([]byte("hi")); return err }}, {"WriteString", func() error { _, err := wconn.WriteString("hi"); return err }}, {"WriteByte", func() error { return wconn.WriteByte('a') }}, {"WriteDirect", func() error { return wconn.WriteDirect([]byte("hi"), 0) }}, {"Flush", func() error { return wconn.Flush() }}, } for _, tc := range methods { t.Run(tc.name, func(t *testing.T) { defer func() { if r := recover(); r != nil { t.Fatalf("Writer.%s panicked after Close: %v", tc.name, r) } }() err := tc.fn() Assert(t, err != nil, fmt.Sprintf("Writer.%s should return error after Close", tc.name)) }) } rconn.Close() } func TestConnectionDailTimeoutAndClose(t *testing.T) { ln := createTestTCPListener(t) defer ln.Close() go func() { for { conn, err := ln.Accept() if err != nil { return } time.Sleep(time.Millisecond) conn.Close() } }() var wg sync.WaitGroup for i := 0; i < 100; i++ { wg.Add(1) go func() { defer wg.Done() conn, err := DialConnection("tcp", ln.Addr().String(), time.Millisecond) Assert(t, err == nil || strings.Contains(err.Error(), "i/o timeout"), err) if err == nil { // XXX: conn is always not nil ... conn.Close() } }() } wg.Wait() } ================================================ FILE: docs/guide/guide_cn.md ================================================ # 快速开始 本教程通过一些简单的 [示例][Examples] 帮助您开始使用 [Netpoll][Netpoll],包括如何使用 [Server](#1-使用-sever)、[Client](#2-使用-dialer) 和 [nocopy API](#3-使用-nocopy-api)。 ## 1. 使用 Sever [这里][server-example] 是一个简单的 server 例子,接下来我们会解释它是如何构建的。 ### 1.1 创建 Listener 首先我们需要一个 `Listener`,它可以是 `net.Listener` 或者 `netpoll.Listener`,两者都可以,依据你的代码情况自由选择。 创建 `Listener` 的过程如下: ```go package main import "net" func main() { listener, err := net.Listen(network, address) if err != nil { panic("create net listener failed") } ... } ``` 或者 ```go package main import "github.com/cloudwego/netpoll" func main() { listener, err := netpoll.CreateListener(network, address) if err != nil { panic("create netpoll listener failed") } ... } ``` ### 1.2 创建 EventLoop `EventLoop` 是一个事件驱动的调度器,一个真正的 NIO Server,负责连接管理、事件调度等。 参数说明: * `OnRequest` 是用户应该自己实现来处理业务逻辑的接口。 [注释][netpoll.go] 详细描述了它的行为。 * `Option` 用于自定义 `EventLoop` 创建时的配置,下面的例子展示了它的用法。更多详情请参考 [options][netpoll_options.go]。 创建过程如下: ```go package main import ( "time" "github.com/cloudwego/netpoll" ) var eventLoop netpoll.EventLoop func main() { ... eventLoop, _ = netpoll.NewEventLoop( handle, netpoll.WithOnPrepare(prepare), netpoll.WithReadTimeout(time.Second), ) ... } ``` ### 1.3 运行 Server `EventLoop` 通过绑定 `Listener` 来提供服务,如下所示。`Serve` 方法为阻塞式调用,直到发生 `panic` 等错误,或者由用户主动调用 `Shutdown` 时触发退出。 ```go package main import ( "github.com/cloudwego/netpoll" ) var eventLoop netpoll.EventLoop func main() { ... // start listen loop ... eventLoop.Serve(listener) } ``` ### 1.4 关闭 Server `EventLoop` 提供了 `Shutdown` 功能,用于优雅地停止服务器。用法如下: ```go package main import ( "context" "time" "github.com/cloudwego/netpoll" ) var eventLoop netpoll.EventLoop func main() { // stop server ... ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() eventLoop.Shutdown(ctx) } ``` ## 2. 使用 Dialer [Netpoll][Netpoll] 也支持在 Client 端使用,提供了 `Dialer`,类似于 `net.Dialer`。同样的,[这里][client-example] 展示了一个简单的 Client 端示例,接下来我们详细介绍一下: ### 2.1 快速方式 与 [Net][net] 类似,[Netpoll][Netpoll] 提供了几个用于直接建立连接的公共方法,可以直接调用。 如: ```go DialConnection(network, address string, timeout time.Duration) (connection Connection, err error) DialTCP(ctx context.Context, network string, laddr, raddr *TCPAddr) (*TCPConnection, error) DialUnix(network string, laddr, raddr *UnixAddr) (*UnixConnection, error) ``` ### 2.2 创建 Dialer [Netpoll][Netpoll] 还定义了`Dialer` 接口。 用法如下:(通常推荐使用上一节的快速方式) ```go package main import ( "github.com/cloudwego/netpoll" ) func main() { // Dial a connection with Dialer. dialer := netpoll.NewDialer() conn, err := dialer.DialConnection(network, address, timeout) if err != nil { panic("dial netpoll connection failed") } ... } ``` ## 3. 使用 Nocopy API `Connection` 提供了 Nocopy API —— `Reader` 和 `Writer`,以避免频繁复制。下面介绍一下它们的简单用法。 ```go package main type Connection interface { // Recommended nocopy APIs Reader() Reader Writer() Writer ... // see code comments for more details } ``` ### 3.1 简单用法 Nocopy API 设计为两步操作。 使用 `Reader` 时,通过 `Next`、`Peek`、`ReadString` 等方法读取数据后,还需要主动调用 `Release` 方法释放 buffer(`Nocopy` 读取 buffer 的原地址,所以您必须主动再次确认 buffer 已经不再使用)。 同样,使用 `Writer` 时,首先需要分配一个 `[]byte` 来写入数据,然后调用 `Flush` 确认所有数据都已经写入。`Writer` 还提供了丰富的 API 来分配 buffer,例如 `Malloc`、`WriteString` 等。 下面是一些简单的读写数据的例子。 更多详情请参考 [说明][nocopy.go]。 ```go package main import ( "github.com/cloudwego/netpoll" ) func main() { var conn netpoll.Connection var reader, writer = conn.Reader(), conn.Writer() // reading buf, _ := reader.Next(n) ... parse the read data ... reader.Release() // writing var write_data []byte ... make the write data ... alloc, _ := writer.Malloc(len(write_data)) copy(alloc, write_data) // write data writer.Flush() } ``` ### 3.2 高阶用法 如果你想使用单个连接来发送(或接收)多组数据(如连接多路复用),那么你将面临数据打包和分包。在 [net][net] 上,这种工作一般都是通过复制来完成的。一个例子如下: ```go package main import ( "net" ) func main() { var conn net.Conn var buf = make([]byte, 8192) // reading for { n, _ := conn.Read(buf) ... unpacking & handling ... var i int for i = 0; i <= n-pkgsize; i += pkgsize { pkg := append([]byte{}, buf[i:i+pkgsize]...) go func() { ... handling pkg ... } } buf = append(buf[:0], buf[i:n]...) } // writing var write_datas <-chan []byte ... packing write ... for { pkg := <-write_datas conn.Write(pkg) } } ``` 但是,[Netpoll][Netpoll] 不需要这样做,nocopy APIs 支持对 buffer 进行原地址操作(原地址组包和分包),并通过引用计数实现资源的自动回收和重用。 示例如下(使用方法 `Reader.Slice` 和 `Writer.Append`): ```go package main import ( "github.com/cloudwego/netpoll" ) func main() { var conn netpoll.Connection // reading reader := conn.Reader() for { ... unpacking & handling ... pkg, _ := reader.Slice(pkgsize) go func() { ... handling pkg ... pkg.Release() } } // writing var write_datas <-chan netpoll.Writer ... packing write ... writer := conn.Writer() for { select { case pkg := <-write_datas: writer.Append(pkg) default: if writer.MallocLen() > 0 { writer.Flush() } } } } ``` # 常见用法 ## 1. 如何配置 poller 的数量 ? `NumLoops` 表示 [Netpoll][Netpoll] 创建的 `epoll` 的数量,默认已经根据P的数量自动调整(`runtime.GOMAXPROCS(0)`),用户一般不需要关心。 但是如果你的服务有大量的 I/O,你可能需要如下配置: ```go package main import ( "runtime" "github.com/cloudwego/netpoll" ) func init() { netpoll.SetNumLoops(runtime.GOMAXPROCS(0)) } ``` ## 2. 如何配置 poller 的连接负载均衡 ? 当 [Netpoll][Netpoll] 中有多个 poller 时,服务进程中的连接会负载均衡到每个 poller。 现在支持以下策略: 1. Random * 新连接将分配给随机选择的轮询器。 2. RoundRobin * 新连接将按顺序分配给轮询器。 [Netpoll][Netpoll] 默认使用 `RoundRobin`,用户可以通过以下方式更改: ```go package main import ( "github.com/cloudwego/netpoll" ) func init() { netpoll.SetLoadBalance(netpoll.Random) // or netpoll.SetLoadBalance(netpoll.RoundRobin) } ``` ## 3. 如何配置 [gopool][gopool] ? [Netpoll][Netpoll] 默认使用 [gopool][gopool] 作为 goroutine 池来优化 `栈扩张` 问题(RPC 服务常见问题)。 [gopool][gopool] 项目中已经详细解释了如何自定义配置,这里不再赘述。 当然,如果你的项目没有 `栈扩张` 问题,建议最好关闭 [gopool][gopool],关闭方式如下: ```go package main import ( "github.com/cloudwego/netpoll" ) func init() { netpoll.DisableGopool() } ``` ## 4. 如何初始化新的连接 ? Client 和 Server 端通过不同的方式初始化新连接。 1. 在 Server 端,定义了 `OnPrepare` 来初始化新链接,同时支持返回一个 `context`,可以传递给后续的业务处理并复用。`WithOnPrepare` 提供方法注册。当 Server 接收新连接时,会自动执行注册的 `OnPrepare` 方法来完成准备工作。示例如下: ```go package main import ( "context" "github.com/cloudwego/netpoll" ) func main() { // register OnPrepare var onPrepare netpoll.OnPrepare = prepare evl, _ := netpoll.NewEventLoop(handler, netpoll.WithOnPrepare(onPrepare)) ... } func prepare(connection netpoll.Connection) (ctx context.Context) { ... prepare connection ... return } ``` 2. 在 Client 端,连接初始化需要由用户自行完成。 一般来说,`Dialer` 创建的新连接是可以由用户自行控制的,这与 Server 端被动接收连接不同。因此,用户不需要依赖触发器,可以自行初始化,如下所示: ```go package main import ( "context" "github.com/cloudwego/netpoll" ) func main() { conn, err := netpoll.DialConnection(network, address, timeout) if err != nil { panic("dial netpoll connection failed") } ... prepare here directly ... prepare(conn) ... } func prepare(connection netpoll.Connection) (ctx context.Context) { ... prepare connection ... return } ``` ## 5. 如何配置连接超时 ? [Netpoll][Netpoll] 现在支持两种类型的超时配置: 1. 读超时(`ReadTimeout`) * 为了保持与 `net.Conn` 相同的操作风格,`Connection.Reader` 也被设计为阻塞读取。 所以提供了读取超时(`ReadTimeout`)。 * 读超时(`ReadTimeout`)没有默认值(默认无限等待),可以通过 `Connection` 或 `EventLoop.Option` 进行配置,例如: ```go package main import ( "github.com/cloudwego/netpoll" ) func main() { var conn netpoll.Connection // 1. setting by Connection conn.SetReadTimeout(timeout) // or // 2. setting with Option netpoll.NewEventLoop(handler, netpoll.WithReadTimeout(timeout)) ... } ``` 2. 空闲超时(`IdleTimeout`) * 空闲超时(`IdleTimeout`)利用 `TCP KeepAlive` 机制来踢出死连接并减少维护开销。使用 [Netpoll][Netpoll] 时,一般不需要频繁创建和关闭连接,所以通常来说,空闲连接影响不大。当连接长时间处于非活动状态时,为了防止出现假死、对端挂起、异常断开等造成的死连接,在空闲超时(`IdleTimeout`)后,netpoll 会主动关闭连接。 * 空闲超时(`IdleTimeout`)的默认配置为 `10min`,可以通过 `Connection` API 或 `EventLoop.Option` 进行配置,例如: ```go package main import ( "github.com/cloudwego/netpoll" ) func main() { var conn netpoll.Connection // 1. setting by Connection conn.SetIdleTimeout(timeout) // or // 2. setting with Option netpoll.NewEventLoop(handler, netpoll.WithIdleTimeout(timeout)) ... } ``` ## 6. 如何配置连接的读事件回调 ? `OnRequest` 是指连接上发生读事件时 [Netpoll][Netpoll] 触发的回调。在 Server 端,在创建 `EventLoop` 时,可以注册一个`OnRequest`,在每次连接数据到达时触发,进行业务处理。Client端默认没有 `OnRequest`,需要时可以通过 API 设置。例如: ```go package main import ( "context" "github.com/cloudwego/netpoll" ) func main() { var onRequest netpoll.OnRequest = handler // 1. on server side evl, _ := netpoll.NewEventLoop(onRequest, opts...) ... // 2. on client side conn, _ := netpoll.DialConnection(network, address, timeout) conn.SetOnRequest(handler) ... } func handler(ctx context.Context, connection netpoll.Connection) (err error) { ... handling ... return nil } ``` ## 7. 如何配置连接的关闭回调 ? `CloseCallback` 是指连接关闭时 [Netpoll][Netpoll] 触发的回调,用于在连接关闭后进行额外的处理。 [Netpoll][Netpoll] 能够感知连接状态。当连接被对端关闭或被自己清理时,会主动触发 `CloseCallback`,而不是由下一次调用 `Read` 或 `Write` 时返回错误(`net.Conn` 的方式)。 `Connection` 提供了添加 `CloseCallback` 的 API,已经添加的回调无法删除,支持多个回调。 ```go package main import ( "github.com/cloudwego/netpoll" ) func main() { var conn netpoll.Connection // add close callback var cb netpoll.CloseCallback = callback conn.AddCloseCallback(cb) ... } func callback(connection netpoll.Connection) error { return nil } ``` # 注意事项 ## 1. 错误设置 NumLoops 如果你的服务器运行在物理机上,Go 进程创建的 P 个数就等于机器的 CPU 核心数。 但是 Server 可能不会使用这么多核心。在这种情况下,过多的 poller 会导致性能下降。 这里提供了以下几种解决方案: 1. 使用 `taskset` 命令来限制 CPU 个数,例如: ```shell taskset -c 0-3 $run_your_server ``` 2. 主动设置 P 的个数,例如: ```go package main import ( "runtime" ) func init() { runtime.GOMAXPROCS(num_you_want) } ``` 3. 主动设置 poller 的个数,例如: ```go package main import ( "github.com/cloudwego/netpoll" ) func init() { netpoll.SetNumLoops(num_you_want) } ``` [Netpoll]: https://github.com/cloudwego/netpoll [net]: https://github.com/golang/go/tree/master/src/net [gopool]: https://github.com/bytedance/gopkg/tree/develop/util/gopool [Examples]: https://github.com/cloudwego/netpoll-examples [server-example]: https://github.com/cloudwego/netpoll-examples/blob/main/server.go [client-example]: https://github.com/cloudwego/netpoll-examples/blob/main/client.go [netpoll.go]: https://github.com/cloudwego/netpoll/blob/main/netpoll.go [netpoll_options.go]: https://github.com/cloudwego/netpoll/blob/main/netpoll_options.go [nocopy.go]: https://github.com/cloudwego/netpoll/blob/main/nocopy.go ================================================ FILE: docs/guide/guide_en.md ================================================ # Tutorial This tutorial gets you started with [Netpoll][Netpoll] through some simple [examples][Examples], includes how to use [Server](#1-use-sever), [Client](#2-use-dialer) and [nocopy APIs](#3-use-nocopy-api). ## 1. Use Server [Here][server-example] is a simple server demo, we will explain how it is constructed next. ### 1.1 Create Listener First we need to get a `Listener`, it can be `net.Listener` or `netpoll.Listener`, which is no difference for server usage. Create a `Listener` as shown below: ```go package main import "net" func main() { listener, err := net.Listen(network, address) if err != nil { panic("create net listener failed") } ... } ``` or ```go package main import "github.com/cloudwego/netpoll" func main() { listener, err := netpoll.CreateListener(network, address) if err != nil { panic("create netpoll listener failed") } ... } ``` ### 1.2 New EventLoop `EventLoop` is an event-driven scheduler, a real NIO Server, responsible for connection management, event scheduling, etc. params: * `OnRequest` is an interface that users should implement by themselves to process business logic. [Code Comment][netpoll.go] describes its behavior in detail. * `Option` is used to customize the configuration when creating `EventLoop`, and the following example shows its usage. For more details, please refer to [options][netpoll_options.go]. The creation process is as follows: ```go package main import ( "time" "github.com/cloudwego/netpoll" ) var eventLoop netpoll.EventLoop func main() { ... eventLoop, _ := netpoll.NewEventLoop( handle, netpoll.WithOnPrepare(prepare), netpoll.WithReadTimeout(time.Second), ) ... } ``` ### 1.3 Run Server `EventLoop` provides services by binding `Listener`, as shown below. `Serve` function will block until an error occurs, such as a panic or the user actively calls `Shutdown`. ```go package main import ( "github.com/cloudwego/netpoll" ) var eventLoop netpoll.EventLoop func main() { ... // start listen loop ... eventLoop.Serve(listener) } ``` ### 1.4 Shutdown Server `EventLoop` provides the `Shutdown` function, which is used to stop the server gracefully. The usage is as follows. ```go package main import ( "context" "time" "github.com/cloudwego/netpoll" ) var eventLoop netpoll.EventLoop func main() { // stop server ... ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() eventLoop.Shutdown(ctx) } ``` ## 2. Use Dialer [Netpoll][Netpoll] also has the ability to be used on the Client side. It provides `Dialer`, similar to `net.Dialer`. Again, [here][client-example] is a simple client demo, and then we introduce it in detail. ### 2.1 The Fast Way Similar to [Net][net], [Netpoll][Netpoll] provides several public functions for directly dialing a connection. such as: ```go DialConnection(network, address string, timeout time.Duration) (connection Connection, err error) DialTCP(ctx context.Context, network string, laddr, raddr *TCPAddr) (*TCPConnection, error) DialUnix(network string, laddr, raddr *UnixAddr) (*UnixConnection, error) ``` ### 2.2 Create Dialer [Netpoll][Netpoll] also defines the `Dialer` interface. The usage is as follows: (of course, you can usually use the fast way) ```go package main import ( "github.com/cloudwego/netpoll" ) func main() { // Dial a connection with Dialer. dialer := netpoll.NewDialer() conn, err := dialer.DialConnection(network, address, timeout) if err != nil { panic("dial netpoll connection failed") } ... } ``` ## 3. Use Nocopy API `Connection` provides Nocopy APIs - `Reader` and `Writer`, to avoid frequent copying. Let’s introduce their simple usage. ```go package main type Connection interface { // Recommended nocopy APIs Reader() Reader Writer() Writer ... // see code comments for more details } ``` ### 3.1 Simple Usage Nocopy APIs is designed as a two-step operation. On `Reader`, after reading data through `Next`, `Peek`, `ReadString`, etc., you still have to actively call `Release` to release the buffer(`Nocopy` reads the original address of the buffer, so you must take the initiative to confirm that the buffer is no longer used). Similarly, on `Writer`, you first need to allocate a buffer to write data, and then call `Flush` to confirm that all data has been written. `Writer` also provides rich APIs to allocate buffers, such as `Malloc`, `WriteString` and so on. The following shows some simple examples of reading and writing data. For more details, please refer to the [code comments][nocopy.go]. ```go package main import ( "github.com/cloudwego/netpoll" ) func main() { var conn netpoll.Connection var reader, writer = conn.Reader(), conn.Writer() // reading buf, _ := reader.Next(n) ... parse the read data ... reader.Release() // writing var write_data []byte ... make the write data ... alloc, _ := writer.Malloc(len(write_data)) copy(alloc, write_data) // write data writer.Flush() } ``` ### 3.2 Advanced Usage If you want to use the connection to send (or receive) multiple sets of data, then you will face the work of packing and unpacking the data. On [net][net], this kind of work is generally done by copying. An example is as follows: ```go package main import ( "net" ) func main() { var conn net.Conn var buf = make([]byte, 8192) // reading for { n, _ := conn.Read(buf) ... unpacking & handling ... var i int for i = 0; i <= n-pkgsize; i += pkgsize { pkg := append([]byte{}, buf[i:i+pkgsize]...) go func() { ... handling pkg ... } } buf = append(buf[:0], buf[i:n]...) } // writing var write_datas <-chan []byte ... packing write ... for { pkg := <-write_datas conn.Write(pkg) } } ``` But, this is not necessary in [Netpoll][Netpoll], nocopy APIs supports operations on the original address of the buffer, and realizes automatic recycling and reuse of resources through reference counting. Examples are as follows(use function `Reader.Slice` and `Writer.Append`): ```go package main import ( "github.com/cloudwego/netpoll" ) func main() { var conn netpoll.Connection // reading reader := conn.Reader() for { ... unpacking & handling ... pkg, _ := reader.Slice(pkgsize) go func() { ... handling pkg ... pkg.Release() } } // writing var write_datas <-chan netpoll.Writer ... packing write ... writer := conn.Writer() for { select { case pkg := <-write_datas: writer.Append(pkg) default: if writer.MallocLen() > 0 { writer.Flush() } } } } ``` # How To ## 1. How to configure the number of pollers ? `NumLoops` represents the number of `epoll` created by [Netpoll][Netpoll], which has been automatically adjusted according to the number of P (`runtime.GOMAXPROCS(0)`) by default, and users generally don't need to care. But if your service has heavy I/O, you may need the following configuration: ```go package main import ( "runtime" "github.com/cloudwego/netpoll" ) func init() { netpoll.SetNumLoops(runtime.GOMAXPROCS(0)) } ``` ## 2. How to configure poller's connection loadbalance ? When there are multiple pollers in [Netpoll][Netpoll], the connections in the service process will be loadbalanced to each poller. The following strategies are supported now: 1. Random * The new connection will be assigned to a randomly picked poller. 2. RoundRobin * The new connection will be assigned to the poller in order. [Netpoll][Netpoll] uses `RoundRobin` by default, and users can change it in the following ways: ```go package main import ( "github.com/cloudwego/netpoll" ) func init() { netpoll.SetLoadBalance(netpoll.Random) // or netpoll.SetLoadBalance(netpoll.RoundRobin) } ``` ## 3. How to configure [gopool][gopool] ? [Netpoll][Netpoll] uses [gopool][gopool] as the goroutine pool by default to optimize the `stack growth` problem that generally occurs in RPC services. In the project [gopool][gopool], it explains how to change its configuration, so won't repeat it here. Of course, if your project does not have a `stack growth` problem, it is best to close [gopool][gopool] as follows: ```go package main import ( "github.com/cloudwego/netpoll" ) func init() { netpoll.DisableGopool() } ``` ## 4. How to prepare a new connection ? There are different ways to prepare a new connection on the client and server. 1. On the server side, `OnPrepare` is defined to prepare for the new connection, and it also supports returning a `context`, which can be reused in subsequent business processing. `WithOnPrepare` provides this registration. When the server accepts a new connection, it will automatically execute the registered `OnPrepare` function to complete the preparation work. The example is as follows: ```go package main import ( "context" "github.com/cloudwego/netpoll" ) func main() { // register OnPrepare var onPrepare netpoll.OnPrepare = prepare evl, _ := netpoll.NewEventLoop(handler, netpoll.WithOnPrepare(onPrepare)) ... } func prepare(connection netpoll.Connection) (ctx context.Context) { ... prepare connection ... return } ``` 2. On the client side, the connection preparation needs to be completed by the user. Generally speaking, the connection created by `Dialer` can be controlled by the user, which is different from passively accepting the connection on the server side. Therefore, the user not relying on the trigger, just prepare a new connection like this: ```go package main import ( "context" "github.com/cloudwego/netpoll" ) func main() { conn, err := netpoll.DialConnection(network, address, timeout) if err != nil { panic("dial netpoll connection failed") } ... prepare here directly ... prepare(conn) ... } func prepare(connection netpoll.Connection) (ctx context.Context) { ... prepare connection ... return } ``` ## 5. How to configure connection timeout ? [Netpoll][Netpoll] now supports two timeout configurations: 1. `Read Timeout` * In order to maintain the same operating style as `net.Conn`, `Connection.Reader` is also designed to block reading. So provide `Read Timeout`. * `Read Timeout` has no default value(wait infinitely), it can be configured via `Connection` or `EventLoop.Option`, for example: ```go package main import ( "github.com/cloudwego/netpoll" ) func main() { var conn netpoll.Connection // 1. setting by Connection conn.SetReadTimeout(timeout) // or // 2. setting with Option netpoll.NewEventLoop(handler, netpoll.WithReadTimeout(timeout)) ... } ``` 2. `Idle Timeout` * `Idle Timeout` utilizes the `TCP KeepAlive` mechanism to kick out dead connections and reduce maintenance overhead. When using [Netpoll][Netpoll], there is generally no need to create and close connections frequently, and idle connections have little effect. When the connection is inactive for a long time, in order to prevent dead connection caused by suspended animation, hang of the opposite end, abnormal disconnection, etc., the connection will be actively closed after the `Idle Timeout`. * The default minimum value of `Idle Timeout` is `10min`, which can be configured through `Connection` API or `EventLoop.Option`, for example: ```go package main import ( "github.com/cloudwego/netpoll" ) func main() { var conn netpoll.Connection // 1. setting by Connection conn.SetIdleTimeout(timeout) // or // 2. setting with Option netpoll.NewEventLoop(handler, netpoll.WithIdleTimeout(timeout)) ... } ``` ## 6. How to configure connection read event callback ? `OnRequest` refers to the callback triggered by [Netpoll][Netpoll] when a read event occurs on the connection. On the Server side, when creating the `EventLoop`, you can register an `OnRequest`, which will be triggered when each connection data arrives and perform business processing. On the Client side, there is no `OnRequest` by default, and it can be set via API when needed. E.g: ```go package main import ( "context" "github.com/cloudwego/netpoll" ) func main() { var onRequest netpoll.OnRequest = handler // 1. on server side evl, _ := netpoll.NewEventLoop(onRequest, opts...) ... // 2. on client side conn, _ := netpoll.DialConnection(network, address, timeout) conn.SetOnRequest(handler) ... } func handler(ctx context.Context, connection netpoll.Connection) (err error) { ... handling ... return nil } ``` ## 7. How to configure the connection close callback ? `CloseCallback` refers to the callback triggered by [Netpoll][Netpoll] when the connection is closed, which is used to perform additional processing after the connection is closed. [Netpoll][Netpoll] is able to perceive the connection status. When the connection is closed by peer or cleaned up by self, it will actively trigger `CloseCallback` instead of returning an error on the next `Read` or `Write`(the way of `net.Conn`). `Connection` provides API for adding `CloseCallback`, callbacks that have been added cannot be removed, and multiple callbacks are supported. ```go package main import ( "github.com/cloudwego/netpoll" ) func main() { var conn netpoll.Connection // add close callback var cb netpoll.CloseCallback = callback conn.AddCloseCallback(cb) ... } func callback(connection netpoll.Connection) error { return nil } ``` # Attention ## 1. Wrong setting of NumLoops If your server is running on a physical machine, the number of P created by the Go process is equal to the number of CPUs of the machine. But the server may not use so many cores. In this case, too many pollers will cause performance degradation. There are several solutions: 1. Use the `taskset` command to limit CPU usage, such as: ```shell taskset -c 0-3 $run_your_server ``` 2. Actively set the number of P, for instance: ```go package main import ( "runtime" ) func init() { runtime.GOMAXPROCS(num_you_want) } ``` 3. Actively set the number of pollers, e.g: ```go package main import ( "github.com/cloudwego/netpoll" ) func init() { netpoll.SetNumLoops(num_you_want) } ``` [Netpoll]: https://github.com/cloudwego/netpoll [net]: https://github.com/golang/go/tree/master/src/net [gopool]: https://github.com/bytedance/gopkg/tree/develop/util/gopool [Examples]: https://github.com/cloudwego/netpoll-examples [server-example]: https://github.com/cloudwego/netpoll-examples/blob/main/server.go [client-example]: https://github.com/cloudwego/netpoll-examples/blob/main/client.go [netpoll.go]: https://github.com/cloudwego/netpoll/blob/main/netpoll.go [netpoll_options.go]: https://github.com/cloudwego/netpoll/blob/main/netpoll_options.go [nocopy.go]: https://github.com/cloudwego/netpoll/blob/main/nocopy.go ================================================ FILE: docs/reference/design_cn.md ================================================ # TODO ================================================ FILE: docs/reference/design_en.md ================================================ # TODO ================================================ FILE: docs/reference/explain.md ================================================ # DATA RACE EXPLAIN `Netpoll` declare different files by `//+build !race` and `//+build race` to avoid `DATA RACE` detection in some code. The reason is that the `epoll` uses `unsafe.Pointer` to access the struct pointer, in order to improve performance. This operation is beyond the detection range of the `race detector`, so it is mistaken for data race, but not code bug actually. ================================================ FILE: eventloop.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package netpoll import ( "context" "net" ) // A EventLoop is a network server. type EventLoop interface { // Serve registers a listener and runs blockingly to provide services, including listening to ports, // accepting connections and processing trans data. When an exception occurs or Shutdown is invoked, // Serve will return an error which describes the specific reason. Serve(ln net.Listener) error // Shutdown is used to graceful exit. // It will close all idle connections on the server, but will not change the underlying pollers. // // Argument: ctx set the waiting deadline, after which an error will be returned, // but will not force the closing of connections in progress. Shutdown(ctx context.Context) error } /* The Connection Callback Sequence Diagram | Connection State | Callback Function | Notes | Connected but not initialized | OnPrepare | Conn is not registered into poller | Connected and initialized | OnConnect | Conn is ready for read or write | Read first byte | OnRequest | Conn is ready for read or write | Peer closed but conn is active | OnDisconnect | Conn access will race with OnRequest function | Self closed and conn is closed | CloseCallback | Conn is destroyed Execution Order: OnPrepare => OnConnect => OnRequest => CloseCallback OnDisconnect Note: only OnRequest and OnDisconnect will be executed in parallel */ // OnPrepare is used to inject custom preparation at connection initialization, // which is optional but important in some scenarios. For example, a qps limiter // can be set by closing overloaded connections directly in OnPrepare. // // Return: // context will become the argument of OnRequest. // Usually, custom resources can be initialized in OnPrepare and used in OnRequest. // // PLEASE NOTE: // OnPrepare is executed without any data in the connection, // so Reader() or Writer() cannot be used here, but may be supported in the future. type OnPrepare func(connection Connection) context.Context // OnConnect is called once connection created. // It supports read/write/close connection, and could return a ctx which will be passed to OnRequest. // OnConnect will not block the poller since it's executed asynchronously. // Only after OnConnect finished the OnRequest could be executed. // // An example usage in TCP Proxy scenario: // // func onConnect(ctx context.Context, upstream netpoll.Connection) context.Context { // downstream, _ := netpoll.DialConnection("tcp", downstreamAddr, time.Second) // return context.WithValue(ctx, downstreamKey, downstream) // } // // func onRequest(ctx context.Context, upstream netpoll.Connection) error { // downstream := ctx.Value(downstreamKey).(netpoll.Connection) // } type OnConnect func(ctx context.Context, connection Connection) context.Context // OnDisconnect is called once connection is going to be closed. // OnDisconnect must return as quick as possible because it will block poller. // OnDisconnect is different from CloseCallback, you could check with "The Connection Callback Sequence Diagram" section. type OnDisconnect func(ctx context.Context, connection Connection) // OnRequest defines the function for handling connection. When data is sent from the connection peer, // netpoll actively reads the data in LT mode and places it in the connection's input buffer. // Generally, OnRequest starts handling the data in the following way: // // func OnRequest(ctx context, connection Connection) error { // input := connection.Reader().Next(n) // handling input data... // send, _ := connection.Writer().Malloc(l) // copy(send, output) // connection.Flush() // return nil // } // // OnRequest will run in a separate goroutine and // it is guaranteed that there is one and only one OnRequest running at the same time. // The underlying logic is similar to: // // go func() { // for !connection.Reader().IsEmpty() { // OnRequest(ctx, connection) // } // }() // // PLEASE NOTE: // OnRequest must either eventually read all the input data or actively Close the connection, // otherwise the goroutine will fall into a dead loop. // // Return: error is unused which will be ignored directly. type OnRequest func(ctx context.Context, connection Connection) error ================================================ FILE: fd_operator.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package netpoll import ( "runtime" "sync/atomic" ) // FDOperator is a collection of operations on file descriptors. type FDOperator struct { // FD is file descriptor, poll will bind when register. FD int // The FDOperator provides three operations of reading, writing, and hanging. // The poll actively fire the FDOperator when fd changes, no check the return value of FDOperator. OnRead func(p Poll) error OnWrite func(p Poll) error OnHup func(p Poll) error // The following is the required fn, which must exist when used, or directly panic. // Fns are only called by the poll when handles connection events. Inputs func(vs [][]byte) (rs [][]byte) InputAck func(n int) (err error) // Outputs will locked if len(rs) > 0, which need unlocked by OutputAck. // supportZeroCopy is not implemented, and it will be ignored Outputs func(vs [][]byte) (rs [][]byte, supportZeroCopy bool) OutputAck func(n int) (err error) // poll is the registered location of the file descriptor. poll Poll // protect only detach once detached int32 // private, used by operatorCache next *FDOperator state int32 // CAS: 0(unused) 1(inuse) 2(do-done) index int32 // index in operatorCache } func (op *FDOperator) Control(event PollEvent) error { if event == PollDetach && atomic.AddInt32(&op.detached, 1) > 1 { return nil } return op.poll.Control(op, event) } func (op *FDOperator) Free() { op.poll.Free(op) } func (op *FDOperator) do() (can bool) { return atomic.CompareAndSwapInt32(&op.state, 1, 2) } func (op *FDOperator) done() { atomic.StoreInt32(&op.state, 1) } func (op *FDOperator) inuse() { for !atomic.CompareAndSwapInt32(&op.state, 0, 1) { if atomic.LoadInt32(&op.state) == 1 { return } runtime.Gosched() } } func (op *FDOperator) unused() { for !atomic.CompareAndSwapInt32(&op.state, 1, 0) { if atomic.LoadInt32(&op.state) == 0 { return } runtime.Gosched() } } func (op *FDOperator) isUnused() bool { return atomic.LoadInt32(&op.state) == 0 } func (op *FDOperator) reset() { op.FD = 0 op.OnRead, op.OnWrite, op.OnHup = nil, nil, nil op.Inputs, op.InputAck = nil, nil op.Outputs, op.OutputAck = nil, nil op.poll = nil op.detached = 0 } ================================================ FILE: fd_operator_cache.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package netpoll import ( "runtime" "sync/atomic" "unsafe" ) func newOperatorCache() *operatorCache { return &operatorCache{ cache: make([]*FDOperator, 0, 1024), freelist: make([]int32, 0, 1024), } } type operatorCache struct { first *FDOperator cache []*FDOperator locked int32 // freelist store the freeable operator // to reduce GC pressure, we only store op index here freelocked int32 freelist []int32 } func (c *operatorCache) alloc() *FDOperator { lock(&c.locked) if c.first == nil { const opSize = unsafe.Sizeof(FDOperator{}) n := block4k / opSize if n == 0 { n = 1 } index := int32(len(c.cache)) for i := uintptr(0); i < n; i++ { pd := &FDOperator{index: index} c.cache = append(c.cache, pd) pd.next = c.first c.first = pd index++ } } op := c.first c.first = op.next unlock(&c.locked) return op } // freeable mark the operator that could be freed // only poller could do the real free action func (c *operatorCache) freeable(op *FDOperator) { // reset all state op.unused() op.reset() lock(&c.freelocked) c.freelist = append(c.freelist, op.index) unlock(&c.freelocked) } func (c *operatorCache) free() { lock(&c.freelocked) defer unlock(&c.freelocked) if len(c.freelist) == 0 { return } lock(&c.locked) for _, idx := range c.freelist { op := c.cache[idx] op.next = c.first c.first = op } c.freelist = c.freelist[:0] unlock(&c.locked) } func lock(locked *int32) { for !atomic.CompareAndSwapInt32(locked, 0, 1) { runtime.Gosched() } } func unlock(locked *int32) { atomic.StoreInt32(locked, 0) } ================================================ FILE: fd_operator_cache_test.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !windows // +build !windows package netpoll import ( "runtime" "testing" ) // go test -v -gcflags=-d=checkptr -run=TestPersistFDOperator func TestPersistFDOperator(t *testing.T) { opcache := newOperatorCache() // init size := 2048 ops := make([]*FDOperator, size) for i := 0; i < size; i++ { op := opcache.alloc() op.FD = i ops[i] = op } Equal(t, len(opcache.freelist), 0) // gc for i := 0; i < 4; i++ { runtime.GC() } // check alloc for i := range ops { Equal(t, ops[i].FD, i) opcache.freeable(ops[i]) Equal(t, len(opcache.freelist), i+1) } Equal(t, len(opcache.freelist), size) opcache.free() Equal(t, len(opcache.freelist), 0) Assert(t, len(opcache.cache) >= size) } func BenchmarkPersistFDOperator1(b *testing.B) { b.ReportAllocs() b.ResetTimer() opcache := newOperatorCache() for i := 0; i < b.N; i++ { op := opcache.alloc() opcache.freeable(op) opcache.free() } } func BenchmarkPersistFDOperator2(b *testing.B) { // benchmark b.ReportAllocs() b.SetParallelism(128) b.ResetTimer() opcache := newOperatorCache() b.RunParallel(func(pb *testing.PB) { for pb.Next() { op := opcache.alloc() opcache.freeable(op) opcache.free() } }) } ================================================ FILE: go.mod ================================================ module github.com/cloudwego/netpoll go 1.15 require ( github.com/bytedance/gopkg v0.1.1 github.com/cloudwego/gopkg v0.1.4 golang.org/x/sys v0.19.0 ) ================================================ FILE: go.sum ================================================ github.com/bytedance/gopkg v0.1.1 h1:3azzgSkiaw79u24a+w9arfH8OfnQQ4MHUt9lJFREEaE= github.com/bytedance/gopkg v0.1.1/go.mod h1:576VvJ+eJgyCzdjS+c4+77QF3p7ubbtiKARP3TxducM= github.com/cloudwego/gopkg v0.1.4 h1:EoQiCG4sTonTPHxOGE0VlQs+sQR+Hsi2uN0qqwu8O50= github.com/cloudwego/gopkg v0.1.4/go.mod h1:FQuXsRWRsSqJLsMVd5SYzp8/Z1y5gXKnVvRrWUOsCMI= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o= golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= golang.org/x/term v0.19.0/go.mod h1:2CuTdWZ7KHSQwUzKva0cbMg6q2DMI3Mmxp+gKJbskEk= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= ================================================ FILE: internal/runner/runner.go ================================================ /* * Copyright 2025 CloudWeGo Authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package runner import ( "context" "os" "strconv" bgopool "github.com/bytedance/gopkg/util/gopool" cgopool "github.com/cloudwego/gopkg/concurrency/gopool" ) // RunTask runs the `f` in background, and `ctx` is optional. // `ctx` is used to pass to underlying implementation var RunTask func(ctx context.Context, f func()) func goRunTask(ctx context.Context, f func()) { go f() } func init() { // netpoll uses github.com/bytedance/gopkg/util/gopool by default // if the env is set, change it to cloudwego/gopkg // for most users, using the 'go' keyword directly is more suitable. if yes, _ := strconv.ParseBool(os.Getenv("USE_CLOUDWEGO_GOPOOL")); yes { RunTask = cgopool.CtxGo } else { RunTask = bgopool.CtxGo } } // UseGoRunTask updates RunTask with goRunTask which creates // a new goroutine for the given func, basically `go f()` func UseGoRunTask() { RunTask = goRunTask } // SetPanicHandler sets the panic handler for the global pool. func SetPanicHandler(f func(context.Context, interface{})) { bgopool.SetPanicHandler(f) cgopool.SetPanicHandler(f) } ================================================ FILE: internal/runner/runner_test.go ================================================ /* * Copyright 2025 CloudWeGo Authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package runner import ( "context" "sync" "testing" ) func TestRunTask(t *testing.T) { var wg sync.WaitGroup wg.Add(2) ctx := context.Background() RunTask(ctx, func() { wg.Done() }) UseGoRunTask() RunTask(ctx, func() { wg.Done() }) wg.Wait() } ================================================ FILE: lint.sh ================================================ #!/usr/bin/env bash golangci-lint run ================================================ FILE: mux/mux_test.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !windows // +build !windows package mux import ( "testing" ) func MustNil(t *testing.T, val interface{}) { t.Helper() Assert(t, val == nil, val) if val != nil { t.Fatal("assertion nil failed, val=", val) } } func MustTrue(t *testing.T, cond bool) { t.Helper() if !cond { t.Fatal("assertion true failed.") } } func Equal(t *testing.T, got, expect interface{}) { t.Helper() if got != expect { t.Fatalf("assertion equal failed, got=[%v], expect=[%v]", got, expect) } } func Assert(t *testing.T, cond bool, val ...interface{}) { t.Helper() if !cond { if len(val) > 0 { val = append([]interface{}{"assertion failed:"}, val...) t.Fatal(val...) } else { t.Fatal("assertion failed") } } } ================================================ FILE: mux/shard_queue.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package mux import ( "fmt" "runtime" "sync" "sync/atomic" "github.com/cloudwego/netpoll" "github.com/cloudwego/netpoll/internal/runner" ) /* DOC: * ShardQueue uses the netpoll's nocopy API to merge and send data. * The Data Flush is passively triggered by ShardQueue.Add and does not require user operations. * If there is an error in the data transmission, the connection will be closed. * * ShardQueue.Add: add the data to be sent. * NewShardQueue: create a queue with netpoll.Connection. * ShardSize: the recommended number of shards is 32. */ var ShardSize int func init() { ShardSize = runtime.GOMAXPROCS(0) } // NewShardQueue . func NewShardQueue(size int, conn netpoll.Connection) (queue *ShardQueue) { queue = &ShardQueue{ conn: conn, size: int32(size), getters: make([][]WriterGetter, size), swap: make([]WriterGetter, 0, 64), locks: make([]int32, size), } for i := range queue.getters { queue.getters[i] = make([]WriterGetter, 0, 64) } queue.list = make([]int32, size) return queue } // WriterGetter is used to get a netpoll.Writer. type WriterGetter func() (buf netpoll.Writer, isNil bool) // ShardQueue uses the netpoll's nocopy API to merge and send data. // The Data Flush is passively triggered by ShardQueue.Add and does not require user operations. // If there is an error in the data transmission, the connection will be closed. // ShardQueue.Add: add the data to be sent. type ShardQueue struct { conn netpoll.Connection idx, size int32 getters [][]WriterGetter // len(getters) = size swap []WriterGetter // use for swap locks []int32 // len(locks) = size queueTrigger } const ( // queueTrigger state active = 0 closing = 1 closed = 2 ) // here for trigger type queueTrigger struct { trigger int32 state int32 // 0: active, 1: closing, 2: closed runNum int32 w, r int32 // ptr of list list []int32 // record the triggered shard listLock sync.Mutex // list total lock } // Add adds to q.getters[shard] func (q *ShardQueue) Add(gts ...WriterGetter) { if atomic.LoadInt32(&q.state) != active { return } shard := atomic.AddInt32(&q.idx, 1) % q.size q.lock(shard) trigger := len(q.getters[shard]) == 0 q.getters[shard] = append(q.getters[shard], gts...) q.unlock(shard) if trigger { q.triggering(shard) } } func (q *ShardQueue) Close() error { if !atomic.CompareAndSwapInt32(&q.state, active, closing) { return fmt.Errorf("shardQueue has been closed") } // wait for all tasks finished for atomic.LoadInt32(&q.state) != closed { if atomic.LoadInt32(&q.trigger) == 0 { atomic.StoreInt32(&q.state, closed) return nil } runtime.Gosched() } return nil } // triggering shard. func (q *ShardQueue) triggering(shard int32) { q.listLock.Lock() q.w = (q.w + 1) % q.size q.list[q.w] = shard q.listLock.Unlock() if atomic.AddInt32(&q.trigger, 1) > 1 { return } q.foreach() } // foreach swap r & w. It's not concurrency safe. func (q *ShardQueue) foreach() { if atomic.AddInt32(&q.runNum, 1) > 1 { return } runner.RunTask(nil, func() { var negNum int32 // is negative number of triggerNum for triggerNum := atomic.LoadInt32(&q.trigger); triggerNum > 0; { q.r = (q.r + 1) % q.size shared := q.list[q.r] // lock & swap q.lock(shared) tmp := q.getters[shared] q.getters[shared] = q.swap[:0] q.swap = tmp q.unlock(shared) // deal q.deal(q.swap) negNum-- if triggerNum+negNum == 0 { triggerNum = atomic.AddInt32(&q.trigger, negNum) negNum = 0 } } q.flush() // quit & check again atomic.StoreInt32(&q.runNum, 0) if atomic.LoadInt32(&q.trigger) > 0 { q.foreach() return } // if state is closing, change it to closed atomic.CompareAndSwapInt32(&q.state, closing, closed) }) } // deal is used to get deal of netpoll.Writer. func (q *ShardQueue) deal(gts []WriterGetter) { if !q.conn.IsActive() { return } writer := q.conn.Writer() for _, gt := range gts { buf, isNil := gt() if !isNil { err := writer.Append(buf) if err != nil { q.conn.Close() return } } } } // flush is used to flush netpoll.Writer. func (q *ShardQueue) flush() { err := q.conn.Writer().Flush() if err != nil { q.conn.Close() return } } // lock shard. func (q *ShardQueue) lock(shard int32) { for !atomic.CompareAndSwapInt32(&q.locks[shard], 0, 1) { runtime.Gosched() } } // unlock shard. func (q *ShardQueue) unlock(shard int32) { atomic.StoreInt32(&q.locks[shard], 0) } ================================================ FILE: mux/shard_queue_test.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !windows // +build !windows package mux import ( "net" "testing" "time" "github.com/cloudwego/netpoll" ) func TestShardQueue(t *testing.T) { var svrConn net.Conn accepted := make(chan struct{}) network, address := "tcp", "localhost:12345" ln, err := net.Listen("tcp", address) MustNil(t, err) stop := make(chan int, 1) defer close(stop) go func() { var err error for { select { case <-stop: err = ln.Close() MustNil(t, err) return default: } svrConn, err = ln.Accept() MustNil(t, err) accepted <- struct{}{} } }() conn, err := netpoll.DialConnection(network, address, time.Second) MustNil(t, err) <-accepted // test queue := NewShardQueue(4, conn) count, pkgsize := 16, 11 for i := 0; i < count; i++ { var getter WriterGetter = func() (buf netpoll.Writer, isNil bool) { buf = netpoll.NewLinkBuffer(pkgsize) buf.Malloc(pkgsize) return buf, false } queue.Add(getter) } err = queue.Close() MustNil(t, err) total := count * pkgsize recv := make([]byte, total) rn, err := svrConn.Read(recv) MustNil(t, err) Equal(t, rn, total) } // TODO: need mock flush func BenchmarkShardQueue(b *testing.B) { b.Skip() } ================================================ FILE: net_dialer.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !windows // +build !windows package netpoll import ( "context" "net" "time" ) // DialConnection is a default implementation of Dialer. func DialConnection(network, address string, timeout time.Duration) (connection Connection, err error) { return defaultDialer.DialConnection(network, address, timeout) } // NewFDConnection create a Connection initialized by any fd // It's useful for writing unit tests for functions that have args with the type of netpoll.Connection // The typical usage is like: // // rfd, wfd := netpoll.GetSysFdPairs() // rconn, _ = netpoll.NewFDConnection(rfd) // wconn, _ = netpoll.NewFDConnection(wfd) func NewFDConnection(fd int) (Connection, error) { conn := new(connection) err := conn.init(&netFD{fd: fd}, nil) if err != nil { return nil, err } return conn, nil } // NewDialer only support TCP and unix socket now. func NewDialer() Dialer { return &dialer{} } var defaultDialer = NewDialer() type dialer struct{} // DialTimeout implements Dialer. func (d *dialer) DialTimeout(network, address string, timeout time.Duration) (net.Conn, error) { return d.DialConnection(network, address, timeout) } // DialConnection implements Dialer. func (d *dialer) DialConnection(network, address string, timeout time.Duration) (connection Connection, err error) { ctx := context.Background() if timeout > 0 { subCtx, cancel := context.WithTimeout(ctx, timeout) defer cancel() ctx = subCtx } switch network { case "tcp", "tcp4", "tcp6": return d.dialTCP(ctx, network, address) case "unix", "unixgram", "unixpacket": raddr := &UnixAddr{ UnixAddr: net.UnixAddr{Name: address, Net: network}, } return DialUnix(network, nil, raddr) default: return nil, net.UnknownNetworkError(network) } } func (d *dialer) dialTCP(ctx context.Context, network, address string) (connection *TCPConnection, err error) { host, port, err := net.SplitHostPort(address) if err != nil { return nil, err } var portnum int if portnum, err = net.DefaultResolver.LookupPort(ctx, network, port); err != nil { return nil, err } var ipaddrs []net.IPAddr // host maybe empty if address is :12345 if host == "" { ipaddrs = []net.IPAddr{{}} } else { ipaddrs, err = net.DefaultResolver.LookupIPAddr(ctx, host) if err != nil { return nil, err } if len(ipaddrs) == 0 { return nil, &net.DNSError{Err: "no such host", Name: host, IsNotFound: true} } } var firstErr error // The error from the first address is most relevant. tcpAddr := &TCPAddr{} for _, ipaddr := range ipaddrs { tcpAddr.IP = ipaddr.IP tcpAddr.Port = portnum tcpAddr.Zone = ipaddr.Zone if ipaddr.IP != nil && ipaddr.IP.To4() == nil { connection, err = DialTCP(ctx, "tcp6", nil, tcpAddr) } else { connection, err = DialTCP(ctx, "tcp", nil, tcpAddr) } if err == nil { return connection, nil } select { case <-ctx.Done(): // check timeout error return nil, err default: } if firstErr == nil { firstErr = err } } if firstErr == nil { firstErr = &net.OpError{Op: "dial", Net: network, Source: nil, Addr: nil, Err: errMissingAddress} } return nil, firstErr } // sysDialer contains a Dial's parameters and configuration. type sysDialer struct { net.Dialer network, address string } ================================================ FILE: net_dialer_test.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !windows // +build !windows package netpoll import ( "context" "fmt" "runtime" "strconv" "strings" "sync" "syscall" "testing" "time" ) func TestDialerTCP(t *testing.T) { dialer := NewDialer() address := getTestAddress() conn, err := dialer.DialTimeout("tcp", address, time.Second) MustTrue(t, err != nil) MustTrue(t, conn.(*TCPConnection) == nil) ln, err := CreateListener("tcp", address) MustNil(t, err) stop := make(chan int, 1) defer close(stop) go func() { for { select { case <-stop: err := ln.Close() MustNil(t, err) return default: } conn, err := ln.Accept() if conn == nil && err == nil { continue } } }() conn, err = dialer.DialTimeout("tcp", address, time.Second) MustNil(t, err) MustTrue(t, strings.HasPrefix(conn.LocalAddr().String(), "127.0.0.1:")) Equal(t, conn.RemoteAddr().String(), address) } func TestDialerUnix(t *testing.T) { dialer := NewDialer() conn, err := dialer.DialTimeout("unix", "tmp.sock", time.Second) MustTrue(t, err != nil) MustTrue(t, conn.(*UnixConnection) == nil) ln, err := CreateListener("unix", "tmp.sock") MustNil(t, err) defer ln.Close() stop := make(chan int, 1) defer func() { close(stop) time.Sleep(time.Millisecond) }() go func() { for { select { case <-stop: err := ln.Close() MustNil(t, err) return default: } conn, err := ln.Accept() if conn == nil && err == nil { continue } } }() conn, err = dialer.DialTimeout("unix", "tmp.sock", time.Second) MustNil(t, err) if runtime.GOOS == "linux" { Equal(t, conn.LocalAddr().String(), "@") } else { Equal(t, conn.LocalAddr().String(), "") } Equal(t, conn.RemoteAddr().String(), "tmp.sock") } func TestDialerFdAlloc(t *testing.T) { address := getTestAddress() ln, err := CreateListener("tcp", address) MustNil(t, err) defer ln.Close() el1, _ := NewEventLoop(func(ctx context.Context, connection Connection) error { connection.Close() return nil }) go func() { el1.Serve(ln) }() ctx1, cancel1 := context.WithTimeout(context.Background(), time.Second) defer cancel1() defer el1.Shutdown(ctx1) for i := 0; i < 100; i++ { conn, err := DialConnection("tcp", address, time.Second) MustNil(t, err) fd := conn.(*TCPConnection).fd conn.Write([]byte("hello world")) for conn.IsActive() { runtime.Gosched() } time.Sleep(time.Millisecond) syscall.SetNonblock(fd, true) } } func TestFDClose(t *testing.T) { address := getTestAddress() ln, err := CreateListener("tcp", address) MustNil(t, err) defer ln.Close() el1, _ := NewEventLoop(func(ctx context.Context, connection Connection) error { connection.Close() return nil }) go func() { el1.Serve(ln) }() ctx1, cancel1 := context.WithTimeout(context.Background(), time.Second) defer cancel1() defer el1.Shutdown(ctx1) var fd int var conn Connection conn, err = DialConnection("tcp", address, time.Second) MustNil(t, err) fd = conn.(*TCPConnection).fd syscall.SetNonblock(fd, true) conn.Close() conn, err = DialConnection("tcp", address, time.Second) MustNil(t, err) fd = conn.(*TCPConnection).fd syscall.SetNonblock(fd, true) time.Sleep(time.Second) conn.Close() } // fd data package race test, use two servers and two dialers. func TestDialerThenClose(t *testing.T) { address1 := getTestAddress() address2 := getTestAddress() // server 1 ln1, _ := createTestListener("tcp", address1) el1 := mockDialerEventLoop(1) go func() { el1.Serve(ln1) }() ctx1, cancel1 := context.WithTimeout(context.Background(), time.Second) defer cancel1() defer el1.Shutdown(ctx1) // server 2 ln2, _ := createTestListener("tcp", address2) el2 := mockDialerEventLoop(2) go func() { el2.Serve(ln2) }() ctx2, cancel2 := context.WithTimeout(context.Background(), time.Second) defer cancel2() defer el2.Shutdown(ctx2) size := 20 var wg sync.WaitGroup wg.Add(size) for i := 0; i < size; i++ { go func() { defer wg.Done() for i := 0; i < 50; i++ { // send server 1 conn, err := DialConnection("tcp", address1, time.Second) if err == nil { mockDialerSend(1, &conn.(*TCPConnection).connection) } // send server 2 conn, err = DialConnection("tcp", address2, time.Second) if err == nil { mockDialerSend(2, &conn.(*TCPConnection).connection) } } }() } wg.Wait() } func TestNewFDConnection(t *testing.T) { r, w := GetSysFdPairs() rconn, err := NewFDConnection(r) MustNil(t, err) wconn, err := NewFDConnection(w) MustNil(t, err) _, err = rconn.Writer().WriteString("hello") MustNil(t, err) err = rconn.Writer().Flush() MustNil(t, err) buf, err := wconn.Reader().Next(5) MustNil(t, err) Equal(t, string(buf), "hello") } func mockDialerEventLoop(idx int) EventLoop { el, _ := NewEventLoop(func(ctx context.Context, conn Connection) (err error) { defer func() { if err != nil { fmt.Printf("Error: server%d conn closed: %s", idx, err.Error()) conn.Close() } }() operator := conn.(*connection) fd := operator.fd msg := make([]byte, 15) n, err := operator.Read(msg) if err != nil { fmt.Printf("Error: conn[%d] server%d-read fail: %s", operator.fd, idx, err.Error()) return err } if n < 1 { return nil } if string(msg[0]) != strconv.Itoa(idx) { panic(fmt.Sprintf("msg[%s] != [%d-xxx]", msg, idx)) } ss := strings.Split(string(msg[:n]), "-") rfd, _ := strconv.Atoi(ss[1]) _, err = operator.Write([]byte(fmt.Sprintf("%d-%d", idx, fd))) if err != nil { fmt.Printf("Error: conn[%d] rfd[%d] server%d-write fail: %s", operator.fd, rfd, idx, err.Error()) return err } return nil }) return el } func mockDialerSend(idx int, conn *connection) { defer func() { conn.Close() }() randID1 := []byte(fmt.Sprintf("%d-%d", idx, conn.fd)) _, err := conn.Write(randID1) if err != nil { fmt.Printf("Error: conn[%d] client%d write fail: %s", conn.fd, idx, err.Error()) } msg := make([]byte, 15) _, err = conn.Read(msg) if err != nil { fmt.Printf("Error: conn[%d] client%d Next fail: %s", conn.fd, idx, err.Error()) } } ================================================ FILE: net_io.go ================================================ // Copyright 2023 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build darwin || netbsd || freebsd || openbsd || dragonfly || linux // +build darwin netbsd freebsd openbsd dragonfly linux package netpoll import "syscall" // return value: // - n: n == 0 but err == nil, retry syscall // - err: if not nil, connection should be closed. func ioread(fd int, bs [][]byte, ivs []syscall.Iovec) (n int, err error) { n, err = readv(fd, bs, ivs) if n == 0 && err == nil { // means EOF return 0, Exception(ErrEOF, "") } if err == syscall.EINTR || err == syscall.EAGAIN { return 0, nil } return n, err } // return value: // - n: n == 0 but err == nil, retry syscall // - err: if not nil, connection should be closed. func iosend(fd int, bs [][]byte, ivs []syscall.Iovec, zerocopy bool) (n int, err error) { n, err = sendmsg(fd, bs, ivs, zerocopy) if err == syscall.EAGAIN { return 0, nil } return n, err } ================================================ FILE: net_listener.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build darwin || netbsd || freebsd || openbsd || dragonfly || linux // +build darwin netbsd freebsd openbsd dragonfly linux package netpoll import ( "errors" "net" "os" "syscall" ) // CreateListener return a new Listener. func CreateListener(network, addr string) (l Listener, err error) { if network == "udp" || network == "udp4" || network == "udp6" { return nil, Exception(ErrUnsupported, "UDP") } // tcp, tcp4, tcp6, unix ln, err := net.Listen(network, addr) if err != nil { return nil, err } return ConvertListener(ln) } // ConvertListener converts net.Listener to Listener func ConvertListener(l net.Listener) (nl Listener, err error) { if tmp, ok := l.(Listener); ok { return tmp, nil } ln := &listener{} ln.ln = l ln.addr = l.Addr() err = ln.parseFD() if err != nil { return nil, err } return ln, syscall.SetNonblock(ln.fd, true) } var _ net.Listener = &listener{} type listener struct { fd int addr net.Addr // listener's local addr ln net.Listener // tcp|unix listener file *os.File } // Accept implements Listener. func (ln *listener) Accept() (net.Conn, error) { fd, sa, err := syscall.Accept(ln.fd) if err != nil { /* https://man7.org/linux/man-pages/man2/accept.2.html EAGAIN or EWOULDBLOCK The socket is marked nonblocking and no connections are present to be accepted. POSIX.1-2001 and POSIX.1-2008 allow either error to be returned for this case, and do not require these constants to have the same value, so a portable application should check for both possibilities. */ if err == syscall.EAGAIN || err == syscall.EWOULDBLOCK { return nil, nil } return nil, err } nfd := &netFD{} nfd.fd = fd nfd.localAddr = ln.addr nfd.network = ln.addr.Network() nfd.remoteAddr = sockaddrToAddr(sa) return nfd, nil } // Close implements Listener. func (ln *listener) Close() error { if ln.fd != 0 { syscall.Close(ln.fd) } if ln.file != nil { ln.file.Close() } if ln.ln != nil { ln.ln.Close() } return nil } // Addr implements Listener. func (ln *listener) Addr() net.Addr { return ln.addr } // Fd implements Listener. func (ln *listener) Fd() (fd int) { return ln.fd } func (ln *listener) parseFD() (err error) { switch netln := ln.ln.(type) { case *net.TCPListener: ln.file, err = netln.File() case *net.UnixListener: ln.file, err = netln.File() default: return errors.New("listener type can't support") } if err != nil { return err } ln.fd = int(ln.file.Fd()) return nil } ================================================ FILE: net_listener_test.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build darwin || netbsd || freebsd || openbsd || dragonfly || linux // +build darwin netbsd freebsd openbsd dragonfly linux package netpoll import ( "context" "net" "sync/atomic" "testing" "time" ) func TestListenerDialer(t *testing.T) { network := "tcp" addr := getTestAddress() ln, err := CreateListener(network, addr) MustNil(t, err) defer ln.Close() trigger := make(chan int) msg := []byte("0123456789") go func() { for { conn, err := ln.Accept() if conn == nil && err == nil { continue } if err != nil { return } go func(conn net.Conn) { <-trigger buf := make([]byte, 10) n, err := conn.Read(buf) MustNil(t, err) Equal(t, n, len(msg)) Equal(t, string(buf[:n]), string(msg)) n, err = conn.Write(buf) MustNil(t, err) Equal(t, n, len(msg)) }(conn) } }() // trigger var closed, read int32 dialer := NewDialer() callback := func(connection Connection) error { atomic.StoreInt32(&closed, 1) return nil } onRequest := func(ctx context.Context, connection Connection) error { atomic.StoreInt32(&read, 1) err := connection.Close() MustNil(t, err) return err } for i := 0; i < 10; i++ { conn, err := dialer.DialConnection(network, addr, time.Second) if err != nil { continue } conn.AddCloseCallback(callback) conn.SetOnRequest(onRequest) MustNil(t, err) n, err := conn.Write(msg) MustNil(t, err) Equal(t, n, len(msg)) time.Sleep(10 * time.Millisecond) trigger <- 1 time.Sleep(10 * time.Millisecond) Equal(t, atomic.LoadInt32(&read), int32(1)) Equal(t, atomic.LoadInt32(&closed), int32(1)) } } func TestConvertListener(t *testing.T) { network, address := "unix", "mock.test.sock" ln, err := net.Listen(network, address) if err != nil { panic(err) } udsln, _ := ln.(*net.UnixListener) // udsln.SetUnlinkOnClose(false) nln, err := ConvertListener(udsln) if err != nil { panic(err) } err = nln.Close() if err != nil { panic(err) } } ================================================ FILE: net_netfd.go ================================================ // Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // // This file may have been modified by CloudWeGo authors. (“CloudWeGo Modifications”). // All CloudWeGo Modifications are Copyright 2022 CloudWeGo authors. //go:build aix || darwin || dragonfly || freebsd || linux || nacl || netbsd || openbsd || solaris // +build aix darwin dragonfly freebsd linux nacl netbsd openbsd solaris package netpoll import ( "context" "errors" "net" "os" "runtime" "syscall" "time" ) // nonDeadline and noCancel are just zero values for // readability with functions taking too many parameters. var noDeadline = time.Time{} type netFD struct { // file descriptor fd int // When calling netFD.dial(), fd will be registered into poll in some scenarios, such as dialing tcp socket, // but not in other scenarios, such as dialing unix socket. // This leads to a different behavior in register poller at after, so use this field to mark it. pd *pollDesc // closed marks whether fd has expired closed uint32 // Whether this is a streaming descriptor. Immutable. isStream bool // Whether a zero byte read indicates EOF. This is false for a // message based socket connection. zeroReadIsEOF bool family int // AF_INET, AF_INET6, syscall.AF_UNIX sotype int // syscall.SOCK_STREAM, syscall.SOCK_DGRAM, syscall.SOCK_RAW isConnected bool // handshake completed or use of association with peer network string // tcp, tcp4, tcp6, unix, unixgram, unixpacket localAddr net.Addr remoteAddr net.Addr // for detaching conn from poller detaching bool } func newNetFD(fd, family, sotype int, net string) *netFD { ret := &netFD{} ret.fd = fd ret.network = net ret.family = family ret.sotype = sotype ret.isStream = sotype == syscall.SOCK_STREAM ret.zeroReadIsEOF = sotype != syscall.SOCK_DGRAM && sotype != syscall.SOCK_RAW return ret } // if dial connection error, you need exec netFD.Close actively func (c *netFD) dial(ctx context.Context, laddr, raddr sockaddr) (err error) { var lsa syscall.Sockaddr if laddr != nil { if lsa, err = laddr.sockaddr(c.family); err != nil { return err } else if lsa != nil { // bind local address if err = syscall.Bind(c.fd, lsa); err != nil { return os.NewSyscallError("bind", err) } } } var rsa syscall.Sockaddr // remote address from the user var crsa syscall.Sockaddr // remote address we actually connected to if raddr != nil { if rsa, err = raddr.sockaddr(c.family); err != nil { return err } } // remote address we actually connected to if crsa, err = c.connect(ctx, lsa, rsa); err != nil { return err } c.isConnected = true // Record the local and remote addresses from the actual socket. // Get the local address by calling Getsockname. // For the remote address, use // 1) the one returned by the connect method, if any; or // 2) the one from Getpeername, if it succeeds; or // 3) the one passed to us as the raddr parameter. lsa, _ = syscall.Getsockname(c.fd) c.localAddr = sockaddrToAddr(lsa) if crsa != nil { c.remoteAddr = sockaddrToAddr(crsa) } else if crsa, _ = syscall.Getpeername(c.fd); crsa != nil { c.remoteAddr = sockaddrToAddr(crsa) } else { c.remoteAddr = sockaddrToAddr(rsa) } return nil } func (c *netFD) connect(ctx context.Context, la, ra syscall.Sockaddr) (rsa syscall.Sockaddr, retErr error) { // Do not need to call c.writing here, // because c is not yet accessible to user, // so no concurrent operations are possible. switch err := syscall.Connect(c.fd, ra); err { case syscall.EINPROGRESS, syscall.EALREADY, syscall.EINTR: case nil, syscall.EISCONN: select { case <-ctx.Done(): return nil, mapErr(ctx.Err()) default: } return nil, nil case syscall.EINVAL: // On Solaris we can see EINVAL if the socket has // already been accepted and closed by the server. // Treat this as a successful connection--writes to // the socket will see EOF. For details and a test // case in C see https://golang.org/issue/6828. if runtime.GOOS == "solaris" { return nil, nil } fallthrough default: return nil, os.NewSyscallError("connect", err) } c.pd = newPollDesc(c.fd) defer func() { // free operator to avoid leak c.pd.operator.Free() c.pd = nil }() for { // Performing multiple connect system calls on a // non-blocking socket under Unix variants does not // necessarily result in earlier errors being // returned. Instead, once runtime-integrated network // poller tells us that the socket is ready, get the // SO_ERROR socket option to see if the connection // succeeded or failed. See issue 7474 for further // details. if err := c.pd.WaitWrite(ctx); err != nil { return nil, err } nerr, err := syscall.GetsockoptInt(c.fd, syscall.SOL_SOCKET, syscall.SO_ERROR) if err != nil { return nil, os.NewSyscallError("getsockopt", err) } switch err := syscall.Errno(nerr); err { case syscall.EINPROGRESS, syscall.EALREADY, syscall.EINTR: case syscall.EISCONN: return nil, nil case syscall.Errno(0): // The runtime poller can wake us up spuriously; // see issues 14548 and 19289. Check that we are // really connected; if not, wait again. if rsa, err := syscall.Getpeername(c.fd); err == nil { return rsa, nil } default: return nil, os.NewSyscallError("connect", err) } } } // Various errors contained in OpError. var ( errMissingAddress = errors.New("missing address") errCanceled = errors.New("operation was canceled") errIOTimeout = errors.New("i/o timeout") ) // mapErr maps from the context errors to the historical internal net // error values. // // TODO(bradfitz): get rid of this after adjusting tests and making // context.DeadlineExceeded implement net.Error? func mapErr(err error) error { switch err { case context.Canceled: return errCanceled case context.DeadlineExceeded: return errIOTimeout default: return err } } ================================================ FILE: net_netfd_conn.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build darwin || netbsd || freebsd || openbsd || dragonfly || linux // +build darwin netbsd freebsd openbsd dragonfly linux package netpoll import ( "net" "strings" "sync/atomic" "syscall" "time" ) var _ Conn = &netFD{} // Fd implements Conn. func (c *netFD) Fd() (fd int) { return c.fd } // Read implements Conn. func (c *netFD) Read(b []byte) (n int, err error) { n, err = syscall.Read(c.fd, b) if err != nil { if err == syscall.EAGAIN || err == syscall.EINTR { return 0, nil } } return n, err } // Write implements Conn. func (c *netFD) Write(b []byte) (n int, err error) { n, err = syscall.Write(c.fd, b) if err != nil { if err == syscall.EAGAIN { return 0, nil } } return n, err } // Close will be executed only once. func (c *netFD) Close() (err error) { if atomic.AddUint32(&c.closed, 1) != 1 { return nil } if !c.detaching && c.fd > 2 { err = syscall.Close(c.fd) if err != nil { logger.Printf("NETPOLL: netFD[%d] close error: %s", c.fd, err.Error()) } } return err } // LocalAddr implements Conn. func (c *netFD) LocalAddr() (addr net.Addr) { return c.localAddr } // RemoteAddr implements Conn. func (c *netFD) RemoteAddr() (addr net.Addr) { return c.remoteAddr } // SetKeepAlive implements Conn. // TODO: only tcp conn is ok. func (c *netFD) SetKeepAlive(second int) error { if !strings.HasPrefix(c.network, "tcp") { return nil } if second > 0 { return SetKeepAlive(c.fd, second) } return nil } // SetDeadline implements Conn. func (c *netFD) SetDeadline(t time.Time) error { return Exception(ErrUnsupported, "SetDeadline") } // SetReadDeadline implements Conn. func (c *netFD) SetReadDeadline(t time.Time) error { return Exception(ErrUnsupported, "SetReadDeadline") } // SetWriteDeadline implements Conn. func (c *netFD) SetWriteDeadline(t time.Time) error { return Exception(ErrUnsupported, "SetWriteDeadline") } ================================================ FILE: net_polldesc.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !windows // +build !windows package netpoll import ( "context" ) func newPollDesc(fd int) *pollDesc { pd := &pollDesc{} poll := pollmanager.Pick() pd.operator = poll.Alloc() pd.operator.poll = poll pd.operator.FD = fd pd.operator.OnWrite = pd.onwrite pd.operator.OnHup = pd.onhup pd.writeTrigger = make(chan struct{}) pd.closeTrigger = make(chan struct{}) return pd } type pollDesc struct { operator *FDOperator // The write event is OneShot, then mark the writable to skip duplicate calling. writeTrigger chan struct{} closeTrigger chan struct{} } // WaitWrite . func (pd *pollDesc) WaitWrite(ctx context.Context) (err error) { if pd.operator.isUnused() { // add ET|Write|Hup if err = pd.operator.Control(PollWritable); err != nil { logger.Printf("NETPOLL: pollDesc register operator failed: %v", err) return err } } select { case <-pd.writeTrigger: // triggered by poller case <-pd.closeTrigger: // triggered by poller // no need to detach, since poller has done it in OnHup. return Exception(ErrConnClosed, "by peer") case <-ctx.Done(): // triggered by ctx // deregister from poller, upper caller function will close fd pd.detach() return mapErr(ctx.Err()) } // double check close trigger select { case <-pd.closeTrigger: return Exception(ErrConnClosed, "by peer") default: return nil } } func (pd *pollDesc) onwrite(p Poll) error { select { case <-pd.writeTrigger: default: pd.detach() close(pd.writeTrigger) } return nil } func (pd *pollDesc) onhup(p Poll) error { select { case <-pd.closeTrigger: default: close(pd.closeTrigger) } return nil } func (pd *pollDesc) detach() { if err := pd.operator.Control(PollDetach); err != nil { logger.Printf("NETPOLL: pollDesc detach operator failed: %v", err) } } ================================================ FILE: net_polldesc_test.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !windows // +build !windows package netpoll import ( "testing" "time" ) func TestZeroTimer(t *testing.T) { MustTrue(t, noDeadline.IsZero()) } func TestRuntimePoll(t *testing.T) { address := getTestAddress() ln, err := CreateListener("tcp", address) MustNil(t, err) stop := make(chan int, 1) defer close(stop) go func() { for { select { case <-stop: err := ln.Close() MustNil(t, err) return default: } conn, err := ln.Accept() if conn == nil && err == nil { continue } } }() for i := 0; i < 10; i++ { conn, err := DialConnection("tcp", address, time.Second) MustNil(t, err) conn.Close() } } ================================================ FILE: net_sock.go ================================================ // Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // // This file may have been modified by CloudWeGo authors. (“CloudWeGo Modifications”). // All CloudWeGo Modifications are Copyright 2022 CloudWeGo authors. //go:build !windows // +build !windows package netpoll import ( "context" "net" "runtime" "syscall" ) // A sockaddr represents a TCP, IP or Unix network endpoint // address that can be converted into a syscall.Sockaddr. type sockaddr interface { net.Addr // family returns the platform-dependent address family // identifier. family() int // isWildcard reports whether the address is a wildcard // address. isWildcard() bool // sockaddr returns the address converted into a syscall // sockaddr type that implements syscall.Sockaddr // interface. It returns a nil interface when the address is nil. sockaddr(family int) (syscall.Sockaddr, error) // toLocal maps the zero address to a local system address (127.0.0.1 or ::1) toLocal(net string) sockaddr } func internetSocket(ctx context.Context, net string, laddr, raddr sockaddr, sotype, proto int, mode string) (conn *netFD, err error) { if (runtime.GOOS == "aix" || runtime.GOOS == "openbsd" || runtime.GOOS == "nacl") && raddr.isWildcard() { raddr = raddr.toLocal(net) } family, ipv6only := favoriteAddrFamily(net, laddr, raddr) return socket(ctx, net, family, sotype, proto, ipv6only, laddr, raddr) } // favoriteAddrFamily returns the appropriate address family for the // given network, laddr, raddr and mode. // // If mode indicates "listen" and laddr is a wildcard, we assume that // the user wants to make a passive-open connection with a wildcard // address family, both AF_INET and AF_INET6, and a wildcard address // like the following: // // - A listen for a wildcard communication domain, "tcp", // with a wildcard address: If the platform supports // both IPv6 and IPv4-mapped IPv6 communication capabilities, // or does not support IPv4, we use a dual stack, AF_INET6 and // IPV6_V6ONLY=0, wildcard address listen. The dual stack // wildcard address listen may fall back to an IPv6-only, // AF_INET6 and IPV6_V6ONLY=1, wildcard address listen. // Otherwise we prefer an IPv4-only, AF_INET, wildcard address // listen. // // - A listen for a wildcard communication domain, "tcp", // with an IPv4 wildcard address: same as above. // // - A listen for a wildcard communication domain, "tcp", // with an IPv6 wildcard address: same as above. // // - A listen for an IPv4 communication domain, "tcp4", // with an IPv4 wildcard address: We use an IPv4-only, AF_INET, // wildcard address listen. // // - A listen for an IPv6 communication domain, "tcp6", // with an IPv6 wildcard address: We use an IPv6-only, AF_INET6 // and IPV6_V6ONLY=1, wildcard address listen. // // Otherwise guess: If the addresses are IPv4 then returns AF_INET, // or else returns AF_INET6. It also returns a boolean value what // designates IPV6_V6ONLY option. // // Note that the latest DragonFly BSD and OpenBSD kernels allow // neither "net.inet6.ip6.v6only=1" change nor IPPROTO_IPV6 level // IPV6_V6ONLY socket option setting. func favoriteAddrFamily(network string, laddr, raddr sockaddr) (family int, ipv6only bool) { switch network[len(network)-1] { case '4': return syscall.AF_INET, false case '6': return syscall.AF_INET6, true } if (laddr == nil || laddr.family() == syscall.AF_INET) && (raddr == nil || raddr.family() == syscall.AF_INET) { return syscall.AF_INET, false } return syscall.AF_INET6, false } // socket returns a network file descriptor that is ready for // asynchronous I/O using the network poller. func socket(ctx context.Context, net string, family, sotype, proto int, ipv6only bool, laddr, raddr sockaddr) (netfd *netFD, err error) { // syscall.Socket & set socket options var fd int fd, err = sysSocket(family, sotype, proto) if err != nil { return nil, err } err = setDefaultSockopts(fd, family, sotype, ipv6only) if err != nil { syscall.Close(fd) return nil, err } netfd = newNetFD(fd, family, sotype, net) err = netfd.dial(ctx, laddr, raddr) if err != nil { netfd.Close() return nil, err } return netfd, nil } // sockaddrToAddr returns a go/net friendly address func sockaddrToAddr(sa syscall.Sockaddr) net.Addr { var a net.Addr switch sa := sa.(type) { case *syscall.SockaddrInet4: a = &net.TCPAddr{ IP: sa.Addr[0:], Port: sa.Port, } case *syscall.SockaddrInet6: var zone string if sa.ZoneId != 0 { if ifi, err := net.InterfaceByIndex(int(sa.ZoneId)); err == nil { zone = ifi.Name } } // if zone == "" && sa.ZoneId != 0 { // } a = &net.TCPAddr{ IP: sa.Addr[0:], Port: sa.Port, Zone: zone, } case *syscall.SockaddrUnix: a = &net.UnixAddr{Net: "unix", Name: sa.Name} } return a } ================================================ FILE: net_tcpsock.go ================================================ // Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // // This file may have been modified by CloudWeGo authors. (“CloudWeGo Modifications”). // All CloudWeGo Modifications are Copyright 2022 CloudWeGo authors. //go:build !windows // +build !windows package netpoll import ( "context" "net" "os" "syscall" ) // TCPAddr represents the address of a TCP end point. type TCPAddr struct { net.TCPAddr } func (a *TCPAddr) isWildcard() bool { if a == nil || a.IP == nil { return true } return a.IP.IsUnspecified() } func (a *TCPAddr) opAddr() net.Addr { if a == nil { return nil } return a } func (a *TCPAddr) family() int { if a == nil || len(a.IP) <= net.IPv4len { return syscall.AF_INET } if a.IP.To4() != nil { return syscall.AF_INET } return syscall.AF_INET6 } func (a *TCPAddr) sockaddr(family int) (syscall.Sockaddr, error) { if a == nil { return nil, nil } return ipToSockaddr(family, a.IP, a.Port, a.Zone) } func (a *TCPAddr) toLocal(network string) sockaddr { addr := &TCPAddr{} addr.IP = loopbackIP(network) addr.Port = a.Port addr.Zone = a.Zone return addr } func loopbackIP(network string) net.IP { if network != "" && network[len(network)-1] == '6' { return net.IPv6loopback } return net.IP{127, 0, 0, 1} } func ipToSockaddr(family int, ip net.IP, port int, zone string) (syscall.Sockaddr, error) { switch family { case syscall.AF_INET: if len(ip) == 0 { ip = net.IPv4zero } ip4 := ip.To4() if ip4 == nil { return nil, &net.AddrError{Err: "non-IPv4 address", Addr: ip.String()} } sa := &syscall.SockaddrInet4{Port: port} copy(sa.Addr[:], ip4) return sa, nil case syscall.AF_INET6: // In general, an IP wildcard address, which is either // "0.0.0.0" or "::", means the entire IP addressing // space. For some historical reason, it is used to // specify "any available address" on some operations // of IP node. // // When the IP node supports IPv4-mapped IPv6 address, // we allow an listener to listen to the wildcard // address of both IP addressing spaces by specifying // IPv6 wildcard address. if len(ip) == 0 || ip.Equal(net.IPv4zero) { ip = net.IPv6zero } // We accept any IPv6 address including IPv4-mapped // IPv6 address. ip6 := ip.To16() if ip6 == nil { return nil, &net.AddrError{Err: "non-IPv6 address", Addr: ip.String()} } // TODO: sa := &syscall.SockaddrInet6{Port: port, ZoneId: uint32(zoneCache.index(zone))} sa := &syscall.SockaddrInet6{Port: port} copy(sa.Addr[:], ip6) return sa, nil } return nil, &net.AddrError{Err: "invalid address family", Addr: ip.String()} } // ResolveTCPAddr returns an address of TCP end point. // // The network must be a TCP network name. // // If the host in the address parameter is not a literal IP address or // the port is not a literal port number, ResolveTCPAddr resolves the // address to an address of TCP end point. // Otherwise, it parses the address as a pair of literal IP address // and port number. // The address parameter can use a host name, but this is not // recommended, because it will return at most one of the host name's // IP addresses. // // See func Dial for a description of the network and address // parameters. func ResolveTCPAddr(network, address string) (*TCPAddr, error) { addr, err := net.ResolveTCPAddr(network, address) if err != nil { return nil, err } return &TCPAddr{*addr}, nil } // TCPConnection implements Connection. type TCPConnection struct { connection } // newTCPConnection wraps *TCPConnection. func newTCPConnection(conn Conn) (connection *TCPConnection, err error) { connection = &TCPConnection{} err = connection.init(conn, nil) if err != nil { return nil, err } return connection, nil } // DialTCP acts like Dial for TCP networks. // // The network must be a TCP network name; see func Dial for details. // // If laddr is nil, a local address is automatically chosen. // If the IP field of raddr is nil or an unspecified IP address, the // local system is assumed. func DialTCP(ctx context.Context, network string, laddr, raddr *TCPAddr) (*TCPConnection, error) { switch network { case "tcp", "tcp4", "tcp6": default: return nil, &net.OpError{Op: "dial", Net: network, Source: laddr.opAddr(), Addr: raddr.opAddr(), Err: net.UnknownNetworkError(network)} } if raddr == nil { return nil, &net.OpError{Op: "dial", Net: network, Source: laddr.opAddr(), Addr: nil, Err: errMissingAddress} } if ctx == nil { ctx = context.Background() } sd := &sysDialer{network: network, address: raddr.String()} c, err := sd.dialTCP(ctx, laddr, raddr) if err != nil { return nil, &net.OpError{Op: "dial", Net: network, Source: laddr.opAddr(), Addr: raddr.opAddr(), Err: err} } return c, nil } func (sd *sysDialer) dialTCP(ctx context.Context, laddr, raddr *TCPAddr) (*TCPConnection, error) { conn, err := internetSocket(ctx, sd.network, laddr, raddr, syscall.SOCK_STREAM, 0, "dial") // TCP has a rarely used mechanism called a 'simultaneous connection' in // which Dial("tcp", addr1, addr2) run on the machine at addr1 can // connect to a simultaneous Dial("tcp", addr2, addr1) run on the machine // at addr2, without either machine executing Listen. If laddr == nil, // it means we want the kernel to pick an appropriate originating local // address. Some Linux kernels cycle blindly through a fixed range of // local ports, regardless of destination port. If a kernel happens to // pick local port 50001 as the source for a Dial("tcp", "", "localhost:50001"), // then the Dial will succeed, having simultaneously connected to itself. // This can only happen when we are letting the kernel pick a port (laddr == nil) // and when there is no listener for the destination address. // It's hard to argue this is anything other than a kernel bug. If we // see this happen, rather than expose the buggy effect to users, we // close the conn and try again. If it happens twice more, we relent and // use the result. See also: // https://golang.org/issue/2690 // https://stackoverflow.com/questions/4949858/ // // The opposite can also happen: if we ask the kernel to pick an appropriate // originating local address, sometimes it picks one that is already in use. // So if the error is EADDRNOTAVAIL, we have to try again too, just for // a different reason. // // The kernel socket code is no doubt enjoying watching us squirm. for i := 0; i < 2 && (laddr == nil || laddr.Port == 0) && (selfConnect(conn, err) || spuriousENOTAVAIL(err)); i++ { if err == nil { conn.Close() } conn, err = internetSocket(ctx, sd.network, laddr, raddr, syscall.SOCK_STREAM, 0, "dial") } if err != nil { return nil, err } return newTCPConnection(conn) } func selfConnect(conn *netFD, err error) bool { // If the connect failed, we clearly didn't connect to ourselves. if err != nil { return false } // The socket constructor can return an conn with raddr nil under certain // unknown conditions. The errors in the calls there to Getpeername // are discarded, but we can't catch the problem there because those // calls are sometimes legally erroneous with a "socket not connected". // Since this code (selfConnect) is already trying to work around // a problem, we make sure if this happens we recognize trouble and // ask the DialTCP routine to try again. // TODO: try to understand what's really going on. if conn.localAddr == nil || conn.remoteAddr == nil { return true } l := conn.localAddr.(*net.TCPAddr) r := conn.remoteAddr.(*net.TCPAddr) return l.Port == r.Port && l.IP.Equal(r.IP) } func spuriousENOTAVAIL(err error) bool { if op, ok := err.(*net.OpError); ok { err = op.Err } if sys, ok := err.(*os.SyscallError); ok { err = sys.Err } return err == syscall.EADDRNOTAVAIL } ================================================ FILE: net_unixsock.go ================================================ // Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // // This file may have been modified by CloudWeGo authors. (“CloudWeGo Modifications”). // All CloudWeGo Modifications are Copyright 2022 CloudWeGo authors. //go:build !windows // +build !windows package netpoll import ( "context" "errors" "net" "syscall" ) // BUG(mikio): On JS, NaCl and Plan 9, methods and functions related // to UnixConn and UnixListener are not implemented. // BUG(mikio): On Windows, methods and functions related to UnixConn // and UnixListener don't work for "unixgram" and "unixpacket". // UnixAddr represents the address of a Unix domain socket end point. type UnixAddr struct { net.UnixAddr } func (a *UnixAddr) isWildcard() bool { return a == nil || a.Name == "" } func (a *UnixAddr) opAddr() net.Addr { if a == nil { return nil } return a } func (a *UnixAddr) family() int { return syscall.AF_UNIX } func (a *UnixAddr) sockaddr(family int) (syscall.Sockaddr, error) { if a == nil { return nil, nil } return &syscall.SockaddrUnix{Name: a.Name}, nil } func (a *UnixAddr) toLocal(net string) sockaddr { return a } // ResolveUnixAddr returns an address of Unix domain socket end point. // // The network must be a Unix network name. // // See func Dial for a description of the network and address // parameters. func ResolveUnixAddr(network, address string) (*UnixAddr, error) { addr, err := net.ResolveUnixAddr(network, address) if err != nil { return nil, err } return &UnixAddr{*addr}, nil } // UnixConnection implements Connection. type UnixConnection struct { connection } // newUnixConnection wraps UnixConnection. func newUnixConnection(conn Conn) (connection *UnixConnection, err error) { connection = &UnixConnection{} err = connection.init(conn, nil) if err != nil { return nil, err } return connection, nil } // DialUnix acts like Dial for Unix networks. // // The network must be a Unix network name; see func Dial for details. // // If laddr is non-nil, it is used as the local address for the // connection. func DialUnix(network string, laddr, raddr *UnixAddr) (*UnixConnection, error) { switch network { case "unix", "unixgram", "unixpacket": default: return nil, &net.OpError{Op: "dial", Net: network, Source: laddr.opAddr(), Addr: raddr.opAddr(), Err: net.UnknownNetworkError(network)} } sd := &sysDialer{network: network, address: raddr.String()} c, err := sd.dialUnix(context.Background(), laddr, raddr) if err != nil { return nil, &net.OpError{Op: "dial", Net: network, Source: laddr.opAddr(), Addr: raddr.opAddr(), Err: err} } return c, nil } func (sd *sysDialer) dialUnix(ctx context.Context, laddr, raddr *UnixAddr) (*UnixConnection, error) { conn, err := unixSocket(ctx, sd.network, laddr, raddr, "dial") if err != nil { return nil, err } return newUnixConnection(conn) } func unixSocket(ctx context.Context, network string, laddr, raddr sockaddr, mode string) (conn *netFD, err error) { var sotype int switch network { case "unix": sotype = syscall.SOCK_STREAM case "unixgram": sotype = syscall.SOCK_DGRAM case "unixpacket": sotype = syscall.SOCK_SEQPACKET default: return nil, net.UnknownNetworkError(network) } switch mode { case "dial": if laddr != nil && laddr.isWildcard() { laddr = nil } if raddr != nil && raddr.isWildcard() { raddr = nil } if raddr == nil && (sotype != syscall.SOCK_DGRAM || laddr == nil) { return nil, errMissingAddress } case "listen": default: return nil, errors.New("unknown mode: " + mode) } return socket(ctx, network, syscall.AF_UNIX, sotype, 0, false, laddr, raddr) } ================================================ FILE: netpoll_config.go ================================================ // Copyright 2024 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package netpoll import ( "context" "io" ) // global config var ( defaultLinkBufferSize = pagesize ) // Config expose some tuning parameters to control the internal behaviors of netpoll. // Every parameter with the default zero value should keep the default behavior of netpoll. type Config struct { PollerNum int // number of pollers BufferSize int // default size of a new connection's LinkBuffer Runner func(ctx context.Context, f func()) // runner for event handler, most of the time use a goroutine pool. LoggerOutput io.Writer // logger output LoadBalance LoadBalance // load balance for poller picker Feature // define all features that not enable by default } // Feature expose some new features maybe promoted as a default behavior but not yet. type Feature struct { // Deprecated: AlwaysNoCopyRead has no effect and will be removed in a future release. AlwaysNoCopyRead bool } ================================================ FILE: netpoll_options.go ================================================ // Copyright 2024 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package netpoll import "time" // Option . type Option struct { f func(*options) } type options struct { onPrepare OnPrepare onConnect OnConnect onDisconnect OnDisconnect onRequest OnRequest readTimeout time.Duration writeTimeout time.Duration idleTimeout time.Duration } // WithOnPrepare registers the OnPrepare method to EventLoop. func WithOnPrepare(onPrepare OnPrepare) Option { return Option{func(op *options) { op.onPrepare = onPrepare }} } // WithOnConnect registers the OnConnect method to EventLoop. func WithOnConnect(onConnect OnConnect) Option { return Option{func(op *options) { op.onConnect = onConnect }} } // WithOnDisconnect registers the OnDisconnect method to EventLoop. func WithOnDisconnect(onDisconnect OnDisconnect) Option { return Option{func(op *options) { op.onDisconnect = onDisconnect }} } // WithReadTimeout sets the read timeout of connections. func WithReadTimeout(timeout time.Duration) Option { return Option{func(op *options) { op.readTimeout = timeout }} } // WithWriteTimeout sets the write timeout of connections. func WithWriteTimeout(timeout time.Duration) Option { return Option{func(op *options) { op.writeTimeout = timeout }} } // WithIdleTimeout sets the idle timeout of connections. func WithIdleTimeout(timeout time.Duration) Option { return Option{func(op *options) { op.idleTimeout = timeout }} } ================================================ FILE: netpoll_server.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !windows // +build !windows package netpoll import ( "context" "errors" "strings" "sync" "syscall" "time" ) // newServer wrap listener into server, quit will be invoked when server exit. func newServer(ln Listener, opts *options, onQuit func(err error)) *server { return &server{ ln: ln, opts: opts, onQuit: onQuit, } } type server struct { operator FDOperator ln Listener opts *options onQuit func(err error) connections sync.Map // key=fd, value=connection } // Run this server. func (s *server) Run() (err error) { s.operator = FDOperator{ FD: s.ln.Fd(), OnRead: s.OnRead, OnHup: s.OnHup, } s.operator.poll = pollmanager.Pick() err = s.operator.Control(PollReadable) if err != nil { s.onQuit(err) } return err } // Close this server with deadline. func (s *server) Close(ctx context.Context) error { s.operator.Control(PollDetach) s.ln.Close() for { activeConn := 0 s.connections.Range(func(key, value interface{}) bool { conn, ok := value.(gracefulExit) if !ok || conn.isIdle() { value.(Connection).Close() } else { activeConn++ } return true }) if activeConn == 0 { // all connections have been closed return nil } // smart control graceful shutdown check internal // we should wait for more time if there are more active connections waitTime := time.Millisecond * time.Duration(activeConn) if waitTime > time.Second { // max wait time is 1000 ms waitTime = time.Millisecond * 1000 } else if waitTime < time.Millisecond*50 { // min wait time is 50 ms waitTime = time.Millisecond * 50 } select { case <-ctx.Done(): return ctx.Err() case <-time.After(waitTime): continue } } } // OnRead implements FDOperator. func (s *server) OnRead(p Poll) error { // accept socket conn, err := s.ln.Accept() if err == nil { if conn != nil { s.onAccept(conn.(Conn)) } // EAGAIN | EWOULDBLOCK if conn and err both nil return nil } logger.Printf("NETPOLL: accept conn failed: %v", err) // delay accept when too many open files if isOutOfFdErr(err) { // since we use Epoll LT, we have to detach listener fd from epoll first // and re-register it when accept successfully or there is no available connection cerr := s.operator.Control(PollDetach) if cerr != nil { logger.Printf("NETPOLL: detach listener fd failed: %v", cerr) return err } go func() { retryTimes := []time.Duration{0, 10, 50, 100, 200, 500, 1000} // ms retryTimeIndex := 0 for { if retryTimeIndex > 0 { time.Sleep(retryTimes[retryTimeIndex] * time.Millisecond) } conn, err := s.ln.Accept() if err == nil { if conn == nil { // recovery accept poll loop s.operator.Control(PollReadable) return } s.onAccept(conn.(Conn)) logger.Println("NETPOLL: re-accept conn success:", conn.RemoteAddr()) retryTimeIndex = 0 continue } if retryTimeIndex+1 < len(retryTimes) { retryTimeIndex++ } logger.Printf("NETPOLL: re-accept conn failed, err=[%s] and next retrytime=%dms", err.Error(), retryTimes[retryTimeIndex]) } }() } // shut down if strings.Contains(err.Error(), "closed") { s.operator.Control(PollDetach) s.onQuit(err) return err } return err } // OnHup implements FDOperator. func (s *server) OnHup(p Poll) error { s.onQuit(errors.New("listener close")) return nil } func (s *server) onAccept(conn Conn) { // store & register connection nconn := new(connection) nconn.init(conn, s.opts) if !nconn.IsActive() { return } fd := conn.Fd() nconn.AddCloseCallback(func(connection Connection) error { s.connections.Delete(fd) return nil }) s.connections.Store(fd, nconn) // trigger onConnect asynchronously nconn.onConnect() } func isOutOfFdErr(err error) bool { se, ok := err.(syscall.Errno) return ok && (se == syscall.EMFILE || se == syscall.ENFILE) } ================================================ FILE: netpoll_unix.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build darwin || netbsd || freebsd || openbsd || dragonfly || linux // +build darwin netbsd freebsd openbsd dragonfly linux package netpoll import ( "context" "io" "log" "net" "os" "runtime" "sync" "github.com/cloudwego/netpoll/internal/runner" ) var ( pollmanager = newManager(runtime.GOMAXPROCS(0)/20 + 1) // pollmanager manage all pollers logger = log.New(os.Stderr, "", log.LstdFlags) ) // Initialize the pollers actively. By default, it's lazy initialized. // It's safe to call it multi times. func Initialize() { // The first call of Pick() will init pollers _ = pollmanager.Pick() } // Configure the internal behaviors of netpoll. // Configure must called in init() function, because the poller will read some global variable after init() finished func Configure(config Config) (err error) { if config.PollerNum > 0 { if err = pollmanager.SetNumLoops(config.PollerNum); err != nil { return err } } if config.BufferSize > 0 { defaultLinkBufferSize = config.BufferSize } if config.Runner != nil { runner.RunTask = config.Runner } if config.LoggerOutput != nil { logger = log.New(config.LoggerOutput, "", log.LstdFlags) } if config.LoadBalance >= 0 { if err = pollmanager.SetLoadBalance(config.LoadBalance); err != nil { return err } } return nil } // SetNumLoops is used to set the number of pollers, generally do not need to actively set. // By default, the number of pollers is equal to runtime.GOMAXPROCS(0)/20+1. // If the number of cores in your service process is less than 20c, theoretically only one poller is needed. // Otherwise, you may need to adjust the number of pollers to achieve the best results. // Experience recommends assigning a poller every 20c. // // You can only use SetNumLoops before any connection is created. An example usage: // // func init() { // netpoll.SetNumLoops(...) // } // // Deprecated: use Configure instead. func SetNumLoops(numLoops int) error { return pollmanager.SetNumLoops(numLoops) } // SetLoadBalance sets the load balancing method. Load balancing is always a best effort to attempt // to distribute the incoming connections between multiple polls. // This option only works when numLoops is set. // Deprecated: use Configure instead. func SetLoadBalance(lb LoadBalance) error { return pollmanager.SetLoadBalance(lb) } // SetLoggerOutput sets the logger output target. // Deprecated: use Configure instead. func SetLoggerOutput(w io.Writer) { logger = log.New(w, "", log.LstdFlags) } // SetRunner set the runner function for every OnRequest/OnConnect callback // // Deprecated: use Configure and specify config.Runner instead. func SetRunner(f func(ctx context.Context, f func())) { runner.RunTask = f } // DisableGopool will remove gopool(the goroutine pool used to run OnRequest), // which means that OnRequest will be run via `go OnRequest(...)`. // Usually, OnRequest will cause stack expansion, which can be solved by reusing goroutine. // But if you can confirm that the OnRequest will not cause stack expansion, // it is recommended to use DisableGopool to reduce redundancy and improve performance. // // Deprecated: use Configure() and specify config.Runner instead. func DisableGopool() error { runner.UseGoRunTask() return nil } // NewEventLoop . func NewEventLoop(onRequest OnRequest, ops ...Option) (EventLoop, error) { opts := &options{ onRequest: onRequest, } for _, do := range ops { do.f(opts) } return &eventLoop{ opts: opts, stop: make(chan error, 1), }, nil } type eventLoop struct { sync.Mutex opts *options svr *server stop chan error } // Serve implements EventLoop. func (evl *eventLoop) Serve(ln net.Listener) error { npln, err := ConvertListener(ln) if err != nil { return err } evl.Lock() evl.svr = newServer(npln, evl.opts, evl.quit) evl.svr.Run() evl.Unlock() err = evl.waitQuit() // ensure evl will not be finalized until Serve returns runtime.SetFinalizer(evl, nil) return err } // Shutdown signals a shutdown a begins server closing. func (evl *eventLoop) Shutdown(ctx context.Context) error { evl.Lock() svr := evl.svr evl.svr = nil evl.Unlock() if svr == nil { return nil } evl.quit(nil) return svr.Close(ctx) } // waitQuit waits for a quit signal func (evl *eventLoop) waitQuit() error { return <-evl.stop } func (evl *eventLoop) quit(err error) { select { case evl.stop <- err: default: } } ================================================ FILE: netpoll_unix_test.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !windows // +build !windows package netpoll import ( "context" "errors" "fmt" "os" "runtime" "sync" "sync/atomic" "syscall" "testing" "time" "github.com/cloudwego/netpoll/internal/runner" ) func MustNil(t *testing.T, val interface{}) { t.Helper() Assert(t, val == nil, val) if val != nil { t.Fatal("assertion nil failed, val=", val) } } func MustTrue(t *testing.T, cond bool) { t.Helper() if !cond { t.Fatal("assertion true failed.") } } func Equal(t *testing.T, got, expect interface{}) { t.Helper() if got != expect { t.Fatalf("assertion equal failed, got=[%v], expect=[%v]", got, expect) } } func Assert(t *testing.T, cond bool, val ...interface{}) { t.Helper() if !cond { if len(val) > 0 { val = append([]interface{}{"assertion failed:"}, val...) t.Fatal(val...) } else { t.Fatal("assertion failed") } } } var testPort int32 = 10000 // getTestAddress return a unique port for every tests, so all tests will not share a same listener func getTestAddress() string { return fmt.Sprintf("127.0.0.1:%d", atomic.AddInt32(&testPort, 1)) } func TestEqual(t *testing.T) { var err error MustNil(t, err) MustTrue(t, err == nil) Equal(t, err, nil) Assert(t, err == nil, err) } func TestOnConnect(t *testing.T) { network, address := "tcp", getTestAddress() req, resp := "ping", "pong" loop := newTestEventLoop(network, address, func(ctx context.Context, connection Connection) error { return nil }, WithOnConnect(func(ctx context.Context, conn Connection) context.Context { for { input, err := conn.Reader().Next(len(req)) if errors.Is(err, ErrEOF) || errors.Is(err, ErrConnClosed) { return ctx } MustNil(t, err) Equal(t, string(input), req) _, err = conn.Writer().WriteString(resp) MustNil(t, err) err = conn.Writer().Flush() MustNil(t, err) } }), ) conn, err := DialConnection(network, address, time.Second) MustNil(t, err) for i := 0; i < 1024; i++ { _, err = conn.Writer().WriteString(req) MustNil(t, err) err = conn.Writer().Flush() MustNil(t, err) input, err := conn.Reader().Next(len(resp)) MustNil(t, err) Equal(t, string(input), resp) } err = conn.Close() MustNil(t, err) err = loop.Shutdown(context.Background()) MustNil(t, err) } func TestOnConnectWrite(t *testing.T) { network, address := "tcp", getTestAddress() loop := newTestEventLoop(network, address, func(ctx context.Context, connection Connection) error { return nil }, WithOnConnect(func(ctx context.Context, connection Connection) context.Context { _, err := connection.Write([]byte("hello")) MustNil(t, err) return ctx }), ) conn, err := DialConnection(network, address, time.Second) MustNil(t, err) s, err := conn.Reader().ReadString(5) MustNil(t, err) MustTrue(t, s == "hello") err = loop.Shutdown(context.Background()) MustNil(t, err) } func TestOnDisconnect(t *testing.T) { type ctxKey struct{} network, address := "tcp", getTestAddress() var canceled, closed int32 var conns int32 = 100 req := "ping" loop := newTestEventLoop(network, address, func(ctx context.Context, connection Connection) error { cancelFunc, _ := ctx.Value(ctxKey{}).(context.CancelFunc) MustTrue(t, cancelFunc != nil) Assert(t, ctx.Done() != nil) buf, err := connection.Reader().Next(4) // should consumed all data MustNil(t, err) Equal(t, string(buf), req) select { case <-ctx.Done(): atomic.AddInt32(&canceled, 1) case <-time.After(time.Second): } return nil }, WithOnConnect(func(ctx context.Context, conn Connection) context.Context { conn.AddCloseCallback(func(connection Connection) error { atomic.AddInt32(&closed, 1) return nil }) ctx, cancel := context.WithCancel(ctx) return context.WithValue(ctx, ctxKey{}, cancel) }), WithOnDisconnect(func(ctx context.Context, conn Connection) { cancelFunc, _ := ctx.Value(ctxKey{}).(context.CancelFunc) MustTrue(t, cancelFunc != nil) cancelFunc() }), ) for i := int32(0); i < conns; i++ { conn, err := DialConnection(network, address, time.Second) MustNil(t, err) _, err = conn.Writer().WriteString(req) MustNil(t, err) err = conn.Writer().Flush() MustNil(t, err) err = conn.Close() MustNil(t, err) } for atomic.LoadInt32(&closed) < conns { t.Logf("closed: %d, canceled: %d", atomic.LoadInt32(&closed), atomic.LoadInt32(&canceled)) runtime.Gosched() } Equal(t, atomic.LoadInt32(&closed), conns) Equal(t, atomic.LoadInt32(&canceled), conns) err := loop.Shutdown(context.Background()) MustNil(t, err) } func TestOnDisconnectWhenOnConnect(t *testing.T) { type ctxPrepareKey struct{} type ctxConnectKey struct{} network, address := "tcp", getTestAddress() var conns int32 = 10 var wg sync.WaitGroup wg.Add(int(conns) * 3) loop := newTestEventLoop(network, address, func(ctx context.Context, connection Connection) error { _, _ = connection.Reader().Next(connection.Reader().Len()) return nil }, WithOnPrepare(func(connection Connection) context.Context { defer wg.Done() var counter int32 return context.WithValue(context.Background(), ctxPrepareKey{}, &counter) }), WithOnConnect(func(ctx context.Context, conn Connection) context.Context { defer wg.Done() t.Logf("OnConnect: %v", conn.RemoteAddr()) time.Sleep(time.Millisecond * 10) // wait for closed called counter := ctx.Value(ctxPrepareKey{}).(*int32) ok := atomic.CompareAndSwapInt32(counter, 0, 1) Assert(t, ok) return context.WithValue(ctx, ctxConnectKey{}, "123") }), WithOnDisconnect(func(ctx context.Context, conn Connection) { defer wg.Done() t.Logf("OnDisconnect: %v", conn.RemoteAddr()) counter, _ := ctx.Value(ctxPrepareKey{}).(*int32) ok := atomic.CompareAndSwapInt32(counter, 1, 2) Assert(t, ok) v := ctx.Value(ctxConnectKey{}).(string) Equal(t, v, "123") }), ) for i := int32(0); i < conns; i++ { conn, err := DialConnection(network, address, time.Second) MustNil(t, err) err = conn.Close() t.Logf("Close: %v", conn.LocalAddr()) MustNil(t, err) } wg.Wait() err := loop.Shutdown(context.Background()) MustNil(t, err) } func TestGracefulExit(t *testing.T) { network, address := "tcp", getTestAddress() // exit without processing connections eventLoop1 := newTestEventLoop(network, address, func(ctx context.Context, connection Connection) error { return nil }) _, err := DialConnection(network, address, time.Second) MustNil(t, err) err = eventLoop1.Shutdown(context.Background()) MustNil(t, err) // exit with processing connections trigger := make(chan struct{}) eventLoop2 := newTestEventLoop(network, address, func(ctx context.Context, conn Connection) error { <-trigger rd := conn.Reader() rd.Next(rd.Len()) // avoid dead loop return errors.New("done") }) for i := 0; i < 10; i++ { // connect success conn, err := DialConnection(network, address, time.Second) MustNil(t, err) _, err = conn.Write(make([]byte, 16)) MustNil(t, err) } // shutdown timeout ctx2, cancel2 := context.WithTimeout(context.Background(), time.Millisecond*100) defer cancel2() err = eventLoop2.Shutdown(ctx2) MustTrue(t, err != nil) Equal(t, err.Error(), ctx2.Err().Error()) // shutdown success close(trigger) err = eventLoop2.Shutdown(ctx2) MustTrue(t, err == nil) // exit with read connections size := 16 eventLoop3 := newTestEventLoop(network, address, func(ctx context.Context, connection Connection) error { _, err := connection.Reader().Next(size) MustNil(t, err) return nil }) for i := 0; i < 10; i++ { conn, err := DialConnection(network, address, time.Second) MustNil(t, err) if i%2 == 0 { _, err := conn.Write(make([]byte, size)) MustNil(t, err) } } ctx3, cancel3 := context.WithTimeout(context.Background(), 5*time.Second) defer cancel3() err = eventLoop3.Shutdown(ctx3) MustNil(t, err) } func TestCloseCallbackWhenOnRequest(t *testing.T) { network, address := "tcp", getTestAddress() requested, closed := make(chan struct{}), make(chan struct{}) loop := newTestEventLoop(network, address, func(ctx context.Context, connection Connection) error { _, err := connection.Reader().Next(connection.Reader().Len()) MustNil(t, err) err = connection.AddCloseCallback(func(connection Connection) error { closed <- struct{}{} return nil }) MustNil(t, err) requested <- struct{}{} return nil }, ) conn, err := DialConnection(network, address, time.Second) MustNil(t, err) _, err = conn.Writer().WriteString("hello") MustNil(t, err) err = conn.Writer().Flush() MustNil(t, err) <-requested err = conn.Close() MustNil(t, err) <-closed err = loop.Shutdown(context.Background()) MustNil(t, err) } func TestCloseCallbackWhenOnConnect(t *testing.T) { network, address := "tcp", getTestAddress() connected, closed := make(chan struct{}), make(chan struct{}) loop := newTestEventLoop(network, address, nil, WithOnConnect(func(ctx context.Context, connection Connection) context.Context { err := connection.AddCloseCallback(func(connection Connection) error { closed <- struct{}{} return nil }) MustNil(t, err) connected <- struct{}{} return ctx }), ) conn, err := DialConnection(network, address, time.Second) MustNil(t, err) err = conn.Close() MustNil(t, err) <-connected <-closed err = loop.Shutdown(context.Background()) MustNil(t, err) } func TestCloseConnWhenOnConnect(t *testing.T) { network, address := "tcp", "localhost:8888" conns := 10 var wg sync.WaitGroup wg.Add(conns) loop := newTestEventLoop(network, address, nil, WithOnConnect(func(ctx context.Context, connection Connection) context.Context { defer wg.Done() err := connection.Close() MustNil(t, err) return ctx }), ) for i := 0; i < conns; i++ { wg.Add(1) go func() { defer wg.Done() conn, err := DialConnection(network, address, time.Second) if err != nil { return } _, err = conn.Reader().Next(1) Assert(t, errors.Is(err, ErrEOF)) err = conn.Close() MustNil(t, err) }() } wg.Wait() err := loop.Shutdown(context.Background()) MustNil(t, err) } func TestServerReadAndClose(t *testing.T) { network, address := "tcp", getTestAddress() sendMsg := []byte("hello") loop := newTestEventLoop(network, address, func(ctx context.Context, connection Connection) error { _, err := connection.Reader().Next(len(sendMsg)) MustNil(t, err) err = connection.Close() MustNil(t, err) return nil }, ) conn, err := DialConnection(network, address, time.Second) MustNil(t, err) _, err = conn.Writer().WriteBinary(sendMsg) MustNil(t, err) err = conn.Writer().Flush() MustNil(t, err) for conn.IsActive() { runtime.Gosched() // wait for poller close connection } _, err = conn.Writer().WriteBinary(sendMsg) Assert(t, errors.Is(err, ErrConnClosed), err) err = loop.Shutdown(context.Background()) MustNil(t, err) } func TestServerPanicAndClose(t *testing.T) { // use custom RunTask to ignore panic log runfunc := runner.RunTask defer func() { runner.RunTask = runfunc }() runner.RunTask = func(ctx context.Context, f func()) { go func() { defer func() { recover() }() f() }() } network, address := "tcp", getTestAddress() sendMsg := []byte("hello") var panicked int32 loop := newTestEventLoop(network, address, func(ctx context.Context, connection Connection) error { _, err := connection.Reader().Next(len(sendMsg)) MustNil(t, err) atomic.StoreInt32(&panicked, 1) panic("test") }, ) conn, err := DialConnection(network, address, time.Second) MustNil(t, err) _, err = conn.Writer().WriteBinary(sendMsg) MustNil(t, err) err = conn.Writer().Flush() MustNil(t, err) for atomic.LoadInt32(&panicked) == 0 { runtime.Gosched() // wait for poller close connection } for conn.IsActive() { runtime.Gosched() // wait for poller close connection } err = loop.Shutdown(context.Background()) MustNil(t, err) } func TestClientWriteAndClose(t *testing.T) { var ( network, address = "tcp", getTestAddress() connnum = 10 packetsize, packetnum = 1000 * 5, 1 recvbytes int32 = 0 ) loop := newTestEventLoop(network, address, func(ctx context.Context, connection Connection) error { buf, err := connection.Reader().Next(connection.Reader().Len()) if errors.Is(err, ErrConnClosed) { return err } MustNil(t, err) atomic.AddInt32(&recvbytes, int32(len(buf))) return nil }, ) var wg sync.WaitGroup for i := 0; i < connnum; i++ { wg.Add(1) go func() { defer wg.Done() conn, err := DialConnection(network, address, time.Second) MustNil(t, err) sendMsg := make([]byte, packetsize) for j := 0; j < packetnum; j++ { _, err = conn.Write(sendMsg) MustNil(t, err) } err = conn.Close() MustNil(t, err) }() } wg.Wait() exceptbytes := int32(packetsize * packetnum * connnum) for atomic.LoadInt32(&recvbytes) != exceptbytes { t.Logf("left %d bytes not received", exceptbytes-atomic.LoadInt32(&recvbytes)) runtime.Gosched() } err := loop.Shutdown(context.Background()) MustNil(t, err) } func TestServerAcceptWhenTooManyOpenFiles(t *testing.T) { if os.Getenv("N_LOCAL") == "" { t.Skip("Only test for debug purpose") return } var originalRlimit syscall.Rlimit err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &originalRlimit) MustNil(t, err) t.Logf("Original RLimit: %v", originalRlimit) rlimit := syscall.Rlimit{Cur: 32, Max: originalRlimit.Max} err = syscall.Setrlimit(syscall.RLIMIT_NOFILE, &rlimit) MustNil(t, err) err = syscall.Getrlimit(syscall.RLIMIT_NOFILE, &rlimit) MustNil(t, err) t.Logf("New RLimit: %v", rlimit) defer func() { // reset err = syscall.Setrlimit(syscall.RLIMIT_NOFILE, &originalRlimit) MustNil(t, err) }() network, address := "tcp", getTestAddress() var connected int32 loop := newTestEventLoop(network, address, func(ctx context.Context, connection Connection) error { buf, err := connection.Reader().Next(connection.Reader().Len()) connection.Writer().WriteBinary(buf) connection.Writer().Flush() return err }, WithOnConnect(func(ctx context.Context, connection Connection) context.Context { atomic.AddInt32(&connected, 1) t.Logf("Conn[%s] accepted", connection.RemoteAddr()) return ctx }), WithOnDisconnect(func(ctx context.Context, connection Connection) { t.Logf("Conn[%s] disconnected", connection.RemoteAddr()) }), ) time.Sleep(time.Millisecond * 10) // out of fds files := make([]*os.File, 0) for { f, err := os.Open("/dev/null") if err != nil { Assert(t, isOutOfFdErr(errors.Unwrap(err)), err) break } files = append(files, f) } go func() { time.Sleep(time.Second * 10) t.Logf("close all files") for _, f := range files { f.Close() } }() // we should use telnet manually connections := 1 for atomic.LoadInt32(&connected) < int32(connections) { t.Logf("connected=%d", atomic.LoadInt32(&connected)) time.Sleep(time.Second) } time.Sleep(time.Second * 10) err = loop.Shutdown(context.Background()) MustNil(t, err) } func createTestListener(network, address string) (Listener, error) { for { ln, err := CreateListener(network, address) if err == nil { return ln, nil } time.Sleep(time.Millisecond * 100) } } func newTestEventLoop(network, address string, onRequest OnRequest, opts ...Option) EventLoop { ln, err := createTestListener(network, address) if err != nil { panic(err) } elp, err := NewEventLoop(onRequest, opts...) if err != nil { panic(err) } go elp.Serve(ln) return elp } ================================================ FILE: netpoll_windows.go ================================================ // Copyright 2024 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build windows // +build windows // The following methods would not be used, but are intended to compile on Windows. package netpoll import ( "net" ) // Configure the internal behaviors of netpoll. func Configure(config Config) (err error) { return nil } // NewDialer only support TCP and unix socket now. func NewDialer() Dialer { return nil } // NewEventLoop . func NewEventLoop(onRequest OnRequest, ops ...Option) (EventLoop, error) { return nil, nil } // ConvertListener converts net.Listener to Listener func ConvertListener(l net.Listener) (nl Listener, err error) { return nil, nil } // CreateListener return a new Listener. func CreateListener(network, addr string) (l Listener, err error) { return nil, nil } ================================================ FILE: nocopy.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package netpoll import ( "io" "reflect" "unsafe" "github.com/bytedance/gopkg/lang/dirtmake" "github.com/bytedance/gopkg/lang/mcache" ) // Reader is a collection of operations for nocopy reads. // // For ease of use, it is recommended to implement Reader as a blocking interface, // rather than simply fetching the buffer. // For example, the return of calling Next(n) should be blocked if there are fewer than n bytes, unless timeout. // The return value is guaranteed to meet the requirements or an error will be returned. type Reader interface { // Next returns a slice containing the next n bytes from the buffer, // advancing the buffer as if the bytes had been returned by Read. // // If there are fewer than n bytes in the buffer, Next returns will be blocked // until data enough or an error occurs (such as a wait timeout). // // The slice p is only valid until the next call to the Release method. // Next is not globally optimal, and Skip, ReadString, ReadBinary methods // are recommended for specific scenarios. // // Return: len(p) must be n or 0, and p and error cannot be nil at the same time. Next(n int) (p []byte, err error) // Peek returns the next n bytes without advancing the reader. // Other behavior is the same as Next. Peek(n int) (buf []byte, err error) // Skip the next n bytes and advance the reader, which is // a faster implementation of Next when the next data is not used. Skip(n int) (err error) // Until reads until the first occurrence of delim in the input, // returning a slice stops with delim in the input buffer. // If Until encounters an error before finding a delimiter, // it returns all the data in the buffer and the error itself (often ErrEOF or ErrConnClosed). // Until returns err != nil only if line does not end in delim. Until(delim byte) (line []byte, err error) // ReadString is a faster implementation of Next when a string needs to be returned. // It replaces: // // var p, err = Next(n) // return string(p), err // ReadString(n int) (s string, err error) // ReadBinary is a faster implementation of Next when it needs to // return a copy of the slice that is not shared with the underlying layer. // It replaces: // // var p, err = Next(n) // var b = make([]byte, n) // copy(b, p) // return b, err // ReadBinary(n int) (p []byte, err error) // ReadByte is a faster implementation of Next when a byte needs to be returned. // It replaces: // // var p, err = Next(1) // return p[0], err // ReadByte() (b byte, err error) // Slice returns a new Reader containing the Next n bytes from this Reader. // // If you want to make a new Reader using the []byte returned by Next, Slice already does that, // and the operation is zero-copy. Besides, Slice would also Release this Reader. // The logic pseudocode is similar: // // var p, err = this.Next(n) // var reader = new Reader(p) // pseudocode // this.Release() // return reader, err // Slice(n int) (r Reader, err error) // Release the memory space occupied by all read slices. This method needs to be executed actively to // recycle the memory after confirming that the previously read data is no longer in use. // After invoking Release, the slices obtained by the method such as Next, Peek, Skip will // become an invalid address and cannot be used anymore. Release() (err error) // Len returns the total length of the readable data in the reader. Len() (length int) } // Writer is a collection of operations for nocopy writes. // // The usage of the design is a two-step operation, first apply for a section of memory, // fill it and then submit. E.g: // // var buf, _ = Malloc(n) // buf = append(buf[:0], ...) // Flush() // // Note that it is not recommended to submit self-managed buffers to Writer. // Since the writer is processed asynchronously, if the self-managed buffer is used and recycled after submission, // it may cause inconsistent life cycle problems. Of course this is not within the scope of the design. type Writer interface { // Malloc returns a slice containing the next n bytes from the buffer, // which will be written after submission(e.g. Flush). // // The slice p is only valid until the next submit(e.g. Flush). // Therefore, please make sure that all data has been written into the slice before submission. Malloc(n int) (buf []byte, err error) // WriteString is a faster implementation of Malloc when a string needs to be written. // It replaces: // // var buf, err = Malloc(len(s)) // n = copy(buf, s) // return n, err // // The argument string s will be referenced based on the original address and will not be copied, // so make sure that the string s will not be changed. WriteString(s string) (n int, err error) // WriteBinary is a faster implementation of Malloc when a slice needs to be written. // It replaces: // // var buf, err = Malloc(len(b)) // n = copy(buf, b) // return n, err // // The argument slice b will be referenced based on the original address and will not be copied, // so make sure that the slice b will not be changed. WriteBinary(b []byte) (n int, err error) // WriteByte is a faster implementation of Malloc when a byte needs to be written. // It replaces: // // var buf, _ = Malloc(1) // buf[0] = b // WriteByte(b byte) (err error) // WriteDirect is used to insert an additional slice of data on the current write stream. // For example, if you plan to execute: // // var bufA, _ = Malloc(nA) // WriteBinary(b) // var bufB, _ = Malloc(nB) // // It can be replaced by: // // var buf, _ = Malloc(nA+nB) // WriteDirect(b, nB) // // where buf[:nA] = bufA, buf[nA:nA+nB] = bufB. WriteDirect(p []byte, remainCap int) error // MallocAck will keep the first n malloc bytes and discard the rest. // The following behavior: // // var buf, _ = Malloc(8) // buf = buf[:5] // MallocAck(5) // // equivalent as // var buf, _ = Malloc(5) // MallocAck(n int) (err error) // Append the argument writer to the tail of this writer and set the argument writer to nil, // the operation is zero-copy, similar to p = append(p, w.p). Append(w Writer) (err error) // Flush will submit all malloc data and must confirm that the allocated bytes have been correctly assigned. // Its behavior is equivalent to the io.Writer hat already has parameters(slice b). Flush() (err error) // MallocLen returns the total length of the writable data that has not yet been submitted in the writer. MallocLen() (length int) } // ReadWriter is a combination of Reader and Writer. type ReadWriter interface { Reader Writer } // NewReader convert io.Reader to nocopy Reader func NewReader(r io.Reader) Reader { return newZCReader(r) } // NewWriter convert io.Writer to nocopy Writer func NewWriter(w io.Writer) Writer { return newZCWriter(w) } // NewReadWriter convert io.ReadWriter to nocopy ReadWriter func NewReadWriter(rw io.ReadWriter) ReadWriter { return &zcReadWriter{ zcReader: newZCReader(rw), zcWriter: newZCWriter(rw), } } // NewIOReader convert Reader to io.Reader func NewIOReader(r Reader) io.Reader { if reader, ok := r.(io.Reader); ok { return reader } return newIOReader(r) } // NewIOWriter convert Writer to io.Writer func NewIOWriter(w Writer) io.Writer { if writer, ok := w.(io.Writer); ok { return writer } return newIOWriter(w) } // NewIOReadWriter convert ReadWriter to io.ReadWriter func NewIOReadWriter(rw ReadWriter) io.ReadWriter { if rwer, ok := rw.(io.ReadWriter); ok { return rwer } return &ioReadWriter{ Reader: NewIOReader(rw), Writer: NewIOWriter(rw), } } const ( block1k = 1 * 1024 block2k = 2 * 1024 block4k = 4 * 1024 block8k = 8 * 1024 block32k = 32 * 1024 pagesize = block8k mallocMax = block8k * block1k // mallocMax is 8MB defaultLinkBufferMode = 0 // flagUnmanaged marks a buffer node whose memory is not allocated by the LinkBuffer // (e.g. user-provided data via WriteDirect, or a zero-size node). // Unmanaged nodes are not reusable and are skipped during buffer growth. flagUnmanaged uint8 = 1 << 0 // 0000 0001 // flagReadExposed marks a buffer node whose underlying memory has been returned // directly to user code via a zero-copy Reader method (Next, Peek, Slice, GetBytes). // The buffer may still be referenced by user code until Release is called. flagReadExposed uint8 = 1 << 1 // 0000 0010 ) // zero-copy slice convert to string func unsafeSliceToString(b []byte) string { return *(*string)(unsafe.Pointer(&b)) } // zero-copy string convert to slice func unsafeStringToSlice(s string) (b []byte) { p := unsafe.Pointer((*reflect.StringHeader)(unsafe.Pointer(&s)).Data) hdr := (*reflect.SliceHeader)(unsafe.Pointer(&b)) hdr.Data = uintptr(p) hdr.Cap = len(s) hdr.Len = len(s) return b } // malloc limits the cap of the buffer from mcache. func malloc(size, capacity int) []byte { if capacity > mallocMax { return dirtmake.Bytes(size, capacity) } return mcache.Malloc(size, capacity) } // free limits the cap of the buffer from mcache. func free(buf []byte) { if cap(buf) > mallocMax { return } mcache.Free(buf) } ================================================ FILE: nocopy_linkbuffer.go ================================================ // Copyright 2024 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package netpoll import ( "bytes" "errors" "fmt" "sync" "sync/atomic" "github.com/bytedance/gopkg/lang/dirtmake" ) // BinaryInplaceThreshold marks the minimum value of the nocopy slice length, // which is the threshold to use copy to minimize overhead. const BinaryInplaceThreshold = block4k // LinkBufferCap that can be modified marks the minimum value of each node of LinkBuffer. var LinkBufferCap = block4k var untilErr = errors.New("link buffer read slice cannot find delim") var ( _ Reader = &LinkBuffer{} _ Writer = &LinkBuffer{} ) // NewLinkBuffer size defines the initial capacity, but there is no readable data. func NewLinkBuffer(size ...int) *LinkBuffer { buf := &LinkBuffer{} var l int if len(size) > 0 { l = size[0] } node := newLinkBufferNode(l) buf.head, buf.read, buf.flush, buf.write = node, node, node, node return buf } // UnsafeLinkBuffer implements ReadWriter. type UnsafeLinkBuffer struct { length int64 mallocSize int head *linkBufferNode // release head read *linkBufferNode // read head flush *linkBufferNode // malloc head write *linkBufferNode // malloc tail // buf allocated by Next when cross-package, which should be freed when release caches [][]byte // for `Peek` only, avoid creating too many []byte in `caches` // fix the issue when we have a large buffer and we call `Peek` multiple times cachePeek []byte } // Len implements Reader. func (b *UnsafeLinkBuffer) Len() int { l := atomic.LoadInt64(&b.length) return int(l) } // IsEmpty check if this LinkBuffer is empty. func (b *UnsafeLinkBuffer) IsEmpty() (ok bool) { return b.Len() == 0 } // ------------------------------------------ implement copy reader ------------------------------------------ // readCopy copies up to len(p) bytes from the buffer into p without exposing // the underlying buffer to user code (flagReadExposed is not set). // After copying, it releases consumed nodes where readExposed is false. // Nodes with readExposed are left for the next Release call. func (b *UnsafeLinkBuffer) readCopy(p []byte) (n int) { l := len(p) if l == 0 || b.Len() == 0 { return 0 } if has := b.Len(); has < l { l = has } b.recalLen(-l) // copy from nodes for ack := l; ack > 0; { if b.read.Len() == 0 { b.read = b.read.next continue } rd := b.read.Len() if rd >= ack { n += copy(p[n:], b.read.buf[b.read.off:b.read.off+ack]) b.read.off += ack break } n += copy(p[n:], b.read.buf[b.read.off:]) ack -= rd b.read = b.read.next } // advance read past empty nodes for b.read != b.flush && b.read.Len() == 0 { b.read = b.read.next } // release consumed nodes that are not readExposed. // exposed nodes stay in the chain so Release() can free them later. // // Example: [exposed/consumed] → [not-exposed/consumed] → [read/partial] // After: head → [exposed] → [read/partial] // the middle node is detached and released. var prev *linkBufferNode newHead := b.read for cur := b.head; cur != b.read; { next := cur.next if cur.readExposed() { if prev == nil { newHead = cur } prev = cur } else { cur.Release() if prev != nil { prev.next = next } } cur = next } b.head = newHead return n } // ------------------------------------------ implement zero-copy reader ------------------------------------------ // Next implements Reader. func (b *UnsafeLinkBuffer) Next(n int) (p []byte, err error) { if n <= 0 { return } // check whether enough or not. if b.Len() < n { return p, fmt.Errorf("link buffer next[%d] not enough", n) } b.recalLen(-n) // re-cal length // single node if b.isSingleNode(n) { b.read.setFlag(flagReadExposed) return b.read.Next(n), nil } // multiple nodes var pIdx int if block1k < n && n <= mallocMax { p = malloc(n, n) b.caches = append(b.caches, p) } else { p = dirtmake.Bytes(n, n) } var l int for ack := n; ack > 0; ack = ack - l { l = b.read.Len() if l >= ack { pIdx += copy(p[pIdx:], b.read.Next(ack)) break } else if l > 0 { pIdx += copy(p[pIdx:], b.read.Next(l)) } b.read = b.read.next } _ = pIdx return p, nil } // Peek does not have an independent lifecycle, and there is no signal to // indicate that Peek content can be released, so Peek will not introduce mcache for now. func (b *UnsafeLinkBuffer) Peek(n int) (p []byte, err error) { if n <= 0 { return } // check whether enough or not. if b.Len() < n { return p, fmt.Errorf("link buffer peek[%d] not enough", n) } // single node if b.isSingleNode(n) { b.read.setFlag(flagReadExposed) return b.read.Peek(n), nil } // multiple nodes // try to make use of the cap of b.cachePeek, if can't, free it. if b.cachePeek != nil && cap(b.cachePeek) < n { free(b.cachePeek) b.cachePeek = nil } if b.cachePeek == nil { b.cachePeek = malloc(0, n) // init with zero len, will append later } p = b.cachePeek if len(p) >= n { // in case we peek smaller than last time, // we can return cache data directly. // we will reset cachePeek when Next or Skip, no worries about stale data return p[:n], nil } // How it works >>>>>> // [ -------- node0 -------- ][ --------- node1 --------- ] <- b.read // [ --------------- p --------------- ] // ^ len(p) ^ n here // ^ scanned // `scanned` var is the len of last nodes which we scanned and already copied to p // `len(p) - scanned` is the start pos of current node for p to copy from // `n - len(p)` is the len of bytes we're going to append to p // we copy `len(node1)` - `len(p) - scanned` bytes in case node1 doesn't have enough data for scanned, node := 0, b.read; len(p) < n; node = node.next { l := node.Len() if scanned+l <= len(p) { // already copied in p, skip scanned += l continue } start := len(p) - scanned // `start` must be smaller than l coz `scanned+l <= len(p)` is false copyn := n - len(p) if nodeLeftN := l - start; copyn > nodeLeftN { copyn = nodeLeftN } p = append(p, node.Peek(l)[start:start+copyn]...) scanned += l } b.cachePeek = p return p[:n], nil } // Skip implements Reader. func (b *UnsafeLinkBuffer) Skip(n int) (err error) { if n <= 0 { return } // check whether enough or not. if b.Len() < n { return fmt.Errorf("link buffer skip[%d] not enough", n) } b.recalLen(-n) // re-cal length var l int for ack := n; ack > 0; ack = ack - l { l = b.read.Len() if l >= ack { b.read.off += ack break } b.read = b.read.next } return nil } // Release the node that has been read. // b.flush == nil indicates that this LinkBuffer is created by LinkBuffer.Slice func (b *UnsafeLinkBuffer) Release() (err error) { for b.read != b.flush && b.read.Len() == 0 { b.read = b.read.next } for b.head != b.read { node := b.head b.head = b.head.next node.Release() } for i := range b.caches { free(b.caches[i]) b.caches[i] = nil } b.caches = b.caches[:0] if b.cachePeek != nil { free(b.cachePeek) b.cachePeek = nil } return nil } // ReadString implements Reader. func (b *UnsafeLinkBuffer) ReadString(n int) (s string, err error) { if n <= 0 { return } // check whether enough or not. if b.Len() < n { return s, fmt.Errorf("link buffer read string[%d] not enough", n) } return unsafeSliceToString(b.readBinary(n)), nil } // ReadBinary implements Reader. func (b *UnsafeLinkBuffer) ReadBinary(n int) (p []byte, err error) { if n <= 0 { return } // check whether enough or not. if b.Len() < n { return p, fmt.Errorf("link buffer read binary[%d] not enough", n) } return b.readBinary(n), nil } // readBinary cannot use mcache, because the memory allocated by readBinary will not be recycled. func (b *UnsafeLinkBuffer) readBinary(n int) (p []byte) { b.recalLen(-n) // re-cal length // single node if b.isSingleNode(n) { p = dirtmake.Bytes(n, n) copy(p, b.read.Next(n)) return p } p = dirtmake.Bytes(n, n) // multiple nodes var pIdx int var l int for ack := n; ack > 0; ack = ack - l { l = b.read.Len() if l >= ack { pIdx += copy(p[pIdx:], b.read.Next(ack)) break } else if l > 0 { pIdx += copy(p[pIdx:], b.read.Next(l)) } b.read = b.read.next } _ = pIdx return p } // ReadByte implements Reader. func (b *UnsafeLinkBuffer) ReadByte() (p byte, err error) { // check whether enough or not. if b.Len() < 1 { return p, errors.New("link buffer read byte is empty") } b.recalLen(-1) // re-cal length for { if b.read.Len() >= 1 { return b.read.Next(1)[0], nil } b.read = b.read.next } } // Until returns a slice ends with the delim in the buffer. func (b *UnsafeLinkBuffer) Until(delim byte) (line []byte, err error) { n := b.indexByte(delim, 0) if n < 0 { return nil, untilErr } return b.Next(n + 1) } // Slice returns a new LinkBuffer, which is a zero-copy slice of this LinkBuffer, // and only holds the ability of Reader. // // Slice will automatically execute a Release. func (b *UnsafeLinkBuffer) Slice(n int) (r Reader, err error) { if n <= 0 { return NewLinkBuffer(0), nil } // check whether enough or not. if b.Len() < n { return r, fmt.Errorf("link buffer readv[%d] not enough", n) } b.recalLen(-n) // re-cal length // just use for range p := new(LinkBuffer) p.length = int64(n) defer func() { // set to read-only p.flush = p.flush.next p.write = p.flush }() // single node if b.isSingleNode(n) { b.read.setFlag(flagReadExposed) node := b.read.Refer(n) p.head, p.read, p.flush = node, node, node return p, nil } // multiple nodes l := b.read.Len() b.read.setFlag(flagReadExposed) node := b.read.Refer(l) b.read = b.read.next p.head, p.read, p.flush = node, node, node for ack := n - l; ack > 0; ack = ack - l { l = b.read.Len() if l >= ack { b.read.setFlag(flagReadExposed) p.flush.next = b.read.Refer(ack) p.flush = p.flush.next break } else if l > 0 { b.read.setFlag(flagReadExposed) p.flush.next = b.read.Refer(l) p.flush = p.flush.next } b.read = b.read.next } return p, b.Release() } // ------------------------------------------ implement zero-copy writer ------------------------------------------ // Malloc pre-allocates memory, which is not readable, and becomes readable data after submission(e.g. Flush). func (b *UnsafeLinkBuffer) Malloc(n int) (buf []byte, err error) { if n <= 0 { return } b.mallocSize += n b.growth(n) return b.write.Malloc(n), nil } // MallocLen implements Writer. func (b *UnsafeLinkBuffer) MallocLen() (length int) { return b.mallocSize } // MallocAck will keep the first n malloc bytes and discard the rest. func (b *UnsafeLinkBuffer) MallocAck(n int) (err error) { if n < 0 { return fmt.Errorf("link buffer malloc ack[%d] invalid", n) } b.mallocSize = n b.write = b.flush var l int for ack := n; ack > 0; ack = ack - l { l = b.write.malloc - len(b.write.buf) if l >= ack { b.write.malloc = ack + len(b.write.buf) break } b.write = b.write.next } // discard the rest for node := b.write.next; node != nil; node = node.next { node.malloc, node.refer, node.buf = node.off, 1, node.buf[:node.off] } return nil } // Flush will submit all malloc data and must confirm that the allocated bytes have been correctly assigned. func (b *UnsafeLinkBuffer) Flush() (err error) { b.mallocSize = 0 // FIXME: The tail node must not be larger than 8KB to prevent Out Of Memory. if cap(b.write.buf) > pagesize { b.write.next = newLinkBufferNode(0) b.write = b.write.next } var n int for node := b.flush; node != b.write.next; node = node.next { delta := node.malloc - len(node.buf) if delta > 0 { n += delta node.buf = node.buf[:node.malloc] } } b.flush = b.write // re-cal length b.recalLen(n) return nil } // Append implements Writer. func (b *UnsafeLinkBuffer) Append(w Writer) (err error) { buf, ok := w.(*LinkBuffer) if !ok { return errors.New("unsupported writer which is not LinkBuffer") } return b.WriteBuffer(buf) } // WriteBuffer will not submit(e.g. Flush) data to ensure normal use of MallocLen. // you must actively submit before read the data. // The argument buf can't be used after calling WriteBuffer. (set it to nil) func (b *UnsafeLinkBuffer) WriteBuffer(buf *LinkBuffer) (err error) { if buf == nil { return } bufLen, bufMallocLen := buf.Len(), buf.MallocLen() if bufLen+bufMallocLen <= 0 { return nil } b.write.next = buf.read b.write = buf.write // close buf, prevents reuse. for buf.head != buf.read { nd := buf.head buf.head = buf.head.next nd.Release() } for buf.write = buf.write.next; buf.write != nil; { nd := buf.write buf.write = buf.write.next nd.Release() } buf.length, buf.mallocSize, buf.head, buf.read, buf.flush, buf.write = 0, 0, nil, nil, nil, nil // DON'T MODIFY THE CODE BELOW UNLESS YOU KNOW WHAT YOU ARE DOING ! // // You may encounter a chain of bugs and not be able to // find out within a week that they are caused by modifications here. // // After release buf, continue to adjust b. b.write.next = nil if bufLen > 0 { b.recalLen(bufLen) } b.mallocSize += bufMallocLen return nil } // WriteString implements Writer. func (b *UnsafeLinkBuffer) WriteString(s string) (n int, err error) { if len(s) == 0 { return } buf := unsafeStringToSlice(s) return b.WriteBinary(buf) } // WriteBinary implements Writer. func (b *UnsafeLinkBuffer) WriteBinary(p []byte) (n int, err error) { n = len(p) if n == 0 { return } b.mallocSize += n // TODO: Verify that all nocopy is possible under mcache. if n > BinaryInplaceThreshold { // expand buffer directly with nocopy b.write.next = newLinkBufferNode(0) b.write = b.write.next b.write.buf, b.write.malloc = p[:0], n return n, nil } // here will copy b.growth(n) buf := b.write.Malloc(n) return copy(buf, p), nil } // WriteDirect cannot be mixed with WriteString or WriteBinary functions. func (b *UnsafeLinkBuffer) WriteDirect(extra []byte, remainLen int) error { n := len(extra) if n == 0 || remainLen < 0 { return nil } // find origin origin := b.flush malloc := b.mallocSize - remainLen // calculate the remaining malloc length for t := origin.malloc - len(origin.buf); t < malloc; t = origin.malloc - len(origin.buf) { malloc -= t origin = origin.next } // Add the buf length of the original node // `malloc` is the origin buffer offset that already malloced, the extra buffer should be inserted after that offset. malloc += len(origin.buf) // Create dataNode and newNode and insert them into the chain // dataNode wrap the user buffer extra, and newNode wrap the origin left netpoll buffer // - originNode{buf=origin, off=0, malloc=malloc, readonly=true} : non-reusable // - dataNode{buf=extra, off=0, malloc=len(extra), readonly=true} : non-reusable // - newNode{buf=origin, off=malloc, malloc=origin.malloc, readonly=false} : reusable dataNode := newLinkBufferNode(0) // zero node will be set by readonly mode dataNode.buf, dataNode.malloc = extra[:0], n if remainLen > 0 { // split a single buffer node to originNode and newNode newNode := newLinkBufferNode(0) newNode.off = malloc newNode.buf = origin.buf[:malloc] newNode.malloc = origin.malloc newNode.unsetFlag(flagUnmanaged) origin.malloc = malloc origin.setFlag(flagUnmanaged) // link nodes dataNode.next = newNode newNode.next = origin.next origin.next = dataNode } else { // link nodes dataNode.next = origin.next origin.next = dataNode } // adjust b.write for b.write.next != nil { b.write = b.write.next } b.mallocSize += n return nil } // WriteByte implements Writer. func (b *UnsafeLinkBuffer) WriteByte(p byte) (err error) { dst, err := b.Malloc(1) if len(dst) == 1 { dst[0] = p } return err } // Close will recycle all buffer. func (b *UnsafeLinkBuffer) Close() (err error) { atomic.StoreInt64(&b.length, 0) b.mallocSize = 0 // just release all b.Release() for node := b.head; node != nil; { nd := node node = node.next nd.Release() } b.head, b.read, b.flush, b.write = nil, nil, nil, nil return nil } // ------------------------------------------ implement connection interface ------------------------------------------ // Bytes returns all the readable bytes of this LinkBuffer. func (b *UnsafeLinkBuffer) Bytes() []byte { node, flush := b.read, b.flush if node == flush { return node.buf[node.off:] } n := 0 p := dirtmake.Bytes(b.Len(), b.Len()) for ; node != flush; node = node.next { if node.Len() > 0 { n += copy(p[n:], node.buf[node.off:]) } } n += copy(p[n:], flush.buf[flush.off:]) return p[:n] } // GetBytes will read and fill the slice p as much as possible. // If p is not passed, return all readable bytes. func (b *UnsafeLinkBuffer) GetBytes(p [][]byte) (vs [][]byte) { node, flush := b.read, b.flush if len(p) == 0 { n := 0 for ; node != flush; node = node.next { n++ } node = b.read p = make([][]byte, n) } var i int for i = 0; node != flush && i < len(p); node = node.next { if node.Len() > 0 { node.setFlag(flagReadExposed) p[i] = node.buf[node.off:] i++ } } if i < len(p) { flush.setFlag(flagReadExposed) p[i] = flush.buf[flush.off:] i++ } return p[:i] } // book will grow and malloc buffer to hold data. // // bookSize: The size of data that can be read at once. // maxSize: The maximum size of data between two Release(). In some cases, this can // // guarantee all data allocated in one node to reduce copy. func (b *UnsafeLinkBuffer) book(bookSize, maxSize int) (p []byte) { l := cap(b.write.buf) - b.write.malloc // grow linkBuffer if l == 0 { l = maxSize b.write.next = newLinkBufferNode(maxSize) b.write = b.write.next } if l > bookSize { l = bookSize } return b.write.Malloc(l) } // bookAck will ack the first n malloc bytes and discard the rest. // // length: The size of data in inputBuffer. It is used to calculate the maxSize func (b *UnsafeLinkBuffer) bookAck(n int) (length int, err error) { b.write.malloc = n + len(b.write.buf) b.write.buf = b.write.buf[:b.write.malloc] b.flush = b.write // re-cal length length = b.recalLen(n) return length, nil } // calcMaxSize will calculate the data size between two Release() func (b *UnsafeLinkBuffer) calcMaxSize() (sum int) { for node := b.head; node != b.read; node = node.next { sum += len(node.buf) } sum += len(b.read.buf) return sum } // resetTail will reset tail node or add an empty tail node to // guarantee the tail node is not larger than 8KB func (b *UnsafeLinkBuffer) resetTail(maxSize int) { if maxSize <= pagesize { // no need to reset a small buffer tail node return } // set nil tail b.write.next = newLinkBufferNode(0) b.write = b.write.next b.flush = b.write } // indexByte returns the index of the first instance of c in buffer, or -1 if c is not present in buffer. func (b *UnsafeLinkBuffer) indexByte(c byte, skip int) int { size := b.Len() if skip >= size { return -1 } var unread, n, l int node := b.read for unread = size; unread > 0; unread -= n { l = node.Len() if l >= unread { // last node n = unread } else { // read full node n = l } // skip current node if skip >= n { skip -= n node = node.next continue } i := bytes.IndexByte(node.Peek(n)[skip:], c) if i >= 0 { return (size - unread) + skip + i // past_read + skip_read + index } skip = 0 // no skip bytes node = node.next } return -1 } // ------------------------------------------ private function ------------------------------------------ // recalLen re-calculate the length func (b *UnsafeLinkBuffer) recalLen(delta int) (length int) { if delta < 0 && len(b.cachePeek) > 0 { // b.cachePeek will contain stale data if we read out even a single byte from buffer, // so we need to reset it or the next Peek call will return invalid bytes. b.cachePeek = b.cachePeek[:0] } return int(atomic.AddInt64(&b.length, int64(delta))) } // growth directly create the next node, when b.write is not enough. func (b *UnsafeLinkBuffer) growth(n int) { if n <= 0 { return } // the memory of readonly node if not malloc by us so should skip them for b.write.getFlag(flagUnmanaged) || cap(b.write.buf)-b.write.malloc < n { if b.write.next == nil { b.write.next = newLinkBufferNode(n) b.write = b.write.next return } b.write = b.write.next } } // isSingleNode determines whether reading needs to cross nodes. // isSingleNode will move b.read to latest non-empty node if there is a zero-size node // Must require b.Len() > 0 func (b *UnsafeLinkBuffer) isSingleNode(readN int) (single bool) { if readN <= 0 { return true } l := b.read.Len() for l == 0 && b.read != b.flush { b.read = b.read.next l = b.read.Len() } return l >= readN } // memorySize return the real memory size in bytes the LinkBuffer occupied func (b *LinkBuffer) memorySize() (bytes int) { for node := b.head; node != nil; node = node.next { bytes += cap(node.buf) } for _, c := range b.caches { bytes += cap(c) } bytes += cap(b.cachePeek) return bytes } // ------------------------------------------ implement link node ------------------------------------------ // newLinkBufferNode create or reuse linkBufferNode. // Nodes with size <= 0 are marked as readonly, which means the node.buf is not allocated by this mcache. func newLinkBufferNode(size int) *linkBufferNode { node := linkedPool.Get().(*linkBufferNode) // reset node offset node.off, node.malloc, node.refer, node.mode = 0, 0, 1, defaultLinkBufferMode if size <= 0 { node.setFlag(flagUnmanaged) return node } if size < LinkBufferCap { size = LinkBufferCap } node.buf = malloc(0, size) return node } var linkedPool = sync.Pool{ New: func() interface{} { return &linkBufferNode{ refer: 1, // comes with 1 reference } }, } type linkBufferNode struct { buf []byte // buffer off int // read-offset malloc int // write-offset refer int32 // reference count mode uint8 // mode store all bool bit status origin *linkBufferNode // the root node of the extends next *linkBufferNode // the next node of the linked buffer } func (node *linkBufferNode) Len() (l int) { return len(node.buf) - node.off } func (node *linkBufferNode) IsEmpty() (ok bool) { return node.off == len(node.buf) } func (node *linkBufferNode) Reset() { if node.origin != nil || atomic.LoadInt32(&node.refer) != 1 { return } node.off, node.malloc = 0, 0 node.buf = node.buf[:0] } func (node *linkBufferNode) Next(n int) (p []byte) { off := node.off node.off += n return node.buf[off:node.off:node.off] } func (node *linkBufferNode) Peek(n int) (p []byte) { return node.buf[node.off : node.off+n : node.off+n] } func (node *linkBufferNode) Malloc(n int) (buf []byte) { malloc := node.malloc node.malloc += n return node.buf[malloc:node.malloc:node.malloc] } // Refer holds a reference count at the same time as Next, and releases the real buffer after Release. // The node obtained by Refer is read-only. func (node *linkBufferNode) Refer(n int) (p *linkBufferNode) { p = newLinkBufferNode(0) p.buf = node.Next(n) if node.origin != nil { p.origin = node.origin } else { p.origin = node } atomic.AddInt32(&p.origin.refer, 1) return p } // Release consists of two parts: // 1. reduce the reference count of itself and origin. // 2. recycle the buf when the reference count is 0. func (node *linkBufferNode) Release() (err error) { if node.origin != nil { node.origin.Release() } // release self if atomic.AddInt32(&node.refer, -1) == 0 { // readonly nodes cannot recycle node.buf, other node.buf are recycled to mcache. if node.reusable() { free(node.buf) } node.buf, node.origin, node.next = nil, nil, nil linkedPool.Put(node) } return nil } func (node *linkBufferNode) getFlag(flag uint8) bool { return node.mode&flag > 0 } func (node *linkBufferNode) setFlag(flag uint8) { node.mode |= flag } func (node *linkBufferNode) unsetFlag(flag uint8) { node.mode &^= flag } // reusable reports whether the node's buffer memory is owned by the LinkBuffer and can be recycled. // Called during Release to decide if node.buf should be returned to mcache via free. func (node *linkBufferNode) reusable() bool { return node.mode&flagUnmanaged == 0 } // readExposed reports whether the node's buffer has been returned directly to user code // via a zero-copy Reader method and may still be referenced externally. func (node *linkBufferNode) readExposed() bool { return node.mode&flagReadExposed > 0 } ================================================ FILE: nocopy_linkbuffer_norace.go ================================================ // Copyright 2024 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !race // +build !race package netpoll type LinkBuffer = UnsafeLinkBuffer ================================================ FILE: nocopy_linkbuffer_race.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build race // +build race package netpoll import ( "sync" ) type LinkBuffer = SafeLinkBuffer // SafeLinkBuffer only used to in go tests with -race type SafeLinkBuffer struct { sync.Mutex UnsafeLinkBuffer } // ------------------------------------------ implement copy reader ------------------------------------------ func (b *SafeLinkBuffer) readCopy(p []byte) int { b.Lock() defer b.Unlock() return b.UnsafeLinkBuffer.readCopy(p) } // ------------------------------------------ implement zero-copy reader ------------------------------------------ // Next implements Reader. func (b *SafeLinkBuffer) Next(n int) (p []byte, err error) { b.Lock() defer b.Unlock() return b.UnsafeLinkBuffer.Next(n) } // Peek implements Reader. func (b *SafeLinkBuffer) Peek(n int) (p []byte, err error) { b.Lock() defer b.Unlock() return b.UnsafeLinkBuffer.Peek(n) } // Skip implements Reader. func (b *SafeLinkBuffer) Skip(n int) (err error) { b.Lock() defer b.Unlock() return b.UnsafeLinkBuffer.Skip(n) } // Until implements Reader. func (b *SafeLinkBuffer) Until(delim byte) (line []byte, err error) { b.Lock() defer b.Unlock() return b.UnsafeLinkBuffer.Until(delim) } // Release implements Reader. func (b *SafeLinkBuffer) Release() (err error) { b.Lock() defer b.Unlock() return b.UnsafeLinkBuffer.Release() } // ReadString implements Reader. func (b *SafeLinkBuffer) ReadString(n int) (s string, err error) { b.Lock() defer b.Unlock() return b.UnsafeLinkBuffer.ReadString(n) } // ReadBinary implements Reader. func (b *SafeLinkBuffer) ReadBinary(n int) (p []byte, err error) { b.Lock() defer b.Unlock() return b.UnsafeLinkBuffer.ReadBinary(n) } // ReadByte implements Reader. func (b *SafeLinkBuffer) ReadByte() (p byte, err error) { b.Lock() defer b.Unlock() return b.UnsafeLinkBuffer.ReadByte() } // Slice implements Reader. func (b *SafeLinkBuffer) Slice(n int) (r Reader, err error) { b.Lock() defer b.Unlock() return b.UnsafeLinkBuffer.Slice(n) } // ------------------------------------------ implement zero-copy writer ------------------------------------------ // Malloc implements Writer. func (b *SafeLinkBuffer) Malloc(n int) (buf []byte, err error) { b.Lock() defer b.Unlock() return b.UnsafeLinkBuffer.Malloc(n) } // MallocLen implements Writer. func (b *SafeLinkBuffer) MallocLen() (length int) { b.Lock() defer b.Unlock() return b.UnsafeLinkBuffer.MallocLen() } // MallocAck implements Writer. func (b *SafeLinkBuffer) MallocAck(n int) (err error) { b.Lock() defer b.Unlock() return b.UnsafeLinkBuffer.MallocAck(n) } // Flush implements Writer. func (b *SafeLinkBuffer) Flush() (err error) { b.Lock() defer b.Unlock() return b.UnsafeLinkBuffer.Flush() } // Append implements Writer. func (b *SafeLinkBuffer) Append(w Writer) (err error) { b.Lock() defer b.Unlock() return b.UnsafeLinkBuffer.Append(w) } // WriteBuffer implements Writer. func (b *SafeLinkBuffer) WriteBuffer(buf *LinkBuffer) (err error) { b.Lock() defer b.Unlock() return b.UnsafeLinkBuffer.WriteBuffer(buf) } // WriteString implements Writer. func (b *SafeLinkBuffer) WriteString(s string) (n int, err error) { b.Lock() defer b.Unlock() return b.UnsafeLinkBuffer.WriteString(s) } // WriteBinary implements Writer. func (b *SafeLinkBuffer) WriteBinary(p []byte) (n int, err error) { b.Lock() defer b.Unlock() return b.UnsafeLinkBuffer.WriteBinary(p) } // WriteDirect cannot be mixed with WriteString or WriteBinary functions. func (b *SafeLinkBuffer) WriteDirect(p []byte, remainLen int) error { b.Lock() defer b.Unlock() return b.UnsafeLinkBuffer.WriteDirect(p, remainLen) } // WriteByte implements Writer. func (b *SafeLinkBuffer) WriteByte(p byte) (err error) { b.Lock() defer b.Unlock() return b.UnsafeLinkBuffer.WriteByte(p) } // Close will recycle all buffer. func (b *SafeLinkBuffer) Close() (err error) { b.Lock() defer b.Unlock() return b.UnsafeLinkBuffer.Close() } // ------------------------------------------ implement connection interface ------------------------------------------ // Bytes returns all the readable bytes of this SafeLinkBuffer. func (b *SafeLinkBuffer) Bytes() []byte { b.Lock() defer b.Unlock() return b.UnsafeLinkBuffer.Bytes() } // GetBytes will read and fill the slice p as much as possible. func (b *SafeLinkBuffer) GetBytes(p [][]byte) (vs [][]byte) { b.Lock() defer b.Unlock() return b.UnsafeLinkBuffer.GetBytes(p) } // book will grow and malloc buffer to hold data. // // bookSize: The size of data that can be read at once. // maxSize: The maximum size of data between two Release(). In some cases, this can // // guarantee all data allocated in one node to reduce copy. func (b *SafeLinkBuffer) book(bookSize, maxSize int) (p []byte) { b.Lock() defer b.Unlock() return b.UnsafeLinkBuffer.book(bookSize, maxSize) } // bookAck will ack the first n malloc bytes and discard the rest. // // length: The size of data in inputBuffer. It is used to calculate the maxSize func (b *SafeLinkBuffer) bookAck(n int) (length int, err error) { b.Lock() defer b.Unlock() return b.UnsafeLinkBuffer.bookAck(n) } // calcMaxSize will calculate the data size between two Release() func (b *SafeLinkBuffer) calcMaxSize() (sum int) { b.Lock() defer b.Unlock() return b.UnsafeLinkBuffer.calcMaxSize() } func (b *SafeLinkBuffer) resetTail(maxSize int) { b.Lock() defer b.Unlock() b.UnsafeLinkBuffer.resetTail(maxSize) } func (b *SafeLinkBuffer) indexByte(c byte, skip int) int { b.Lock() defer b.Unlock() return b.UnsafeLinkBuffer.indexByte(c, skip) } ================================================ FILE: nocopy_linkbuffer_test.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !windows // +build !windows package netpoll import ( "bytes" "encoding/binary" "fmt" "reflect" "sync/atomic" "testing" ) func TestLinkBuffer(t *testing.T) { // clean & new LinkBufferCap = 128 buf := NewLinkBuffer() Equal(t, buf.Len(), 0) MustTrue(t, buf.IsEmpty()) head := buf.head p, err := buf.Next(10) Equal(t, len(p), 0) MustTrue(t, err != nil) buf.Malloc(128) MustTrue(t, buf.IsEmpty()) p, err = buf.Peek(10) Equal(t, len(p), 0) MustTrue(t, err != nil) buf.Flush() Equal(t, buf.Len(), 128) MustTrue(t, !buf.IsEmpty()) p, err = buf.Next(28) Equal(t, len(p), 28) Equal(t, buf.Len(), 100) MustNil(t, err) MustTrue(t, buf.read.readExposed()) // single-node Next exposes buffer p, err = buf.Peek(90) Equal(t, len(p), 90) Equal(t, buf.Len(), 100) MustNil(t, err) MustTrue(t, buf.read.readExposed()) // single-node Peek exposes buffer read := buf.read Equal(t, buf.head, head) err = buf.Release() MustNil(t, err) Equal(t, buf.head, read) inputs := buf.book(block1k, block8k) Equal(t, len(inputs), block1k) Equal(t, buf.Len(), 100) buf.MallocAck(block1k) Equal(t, buf.Len(), 100) Equal(t, buf.MallocLen(), block1k) buf.Flush() Equal(t, buf.Len(), 100+block1k) Equal(t, buf.MallocLen(), 0) outputs := buf.GetBytes(make([][]byte, 16)) Equal(t, len(outputs), 2) err = buf.Skip(block1k) MustNil(t, err) Equal(t, buf.Len(), 100) } func TestLinkBufferGetBytes(t *testing.T) { buf := NewLinkBuffer() var ( num = 10 b = 1 expectedLen = 0 ) for i := 0; i < num; i++ { expectedLen += b n, err := buf.WriteBinary(make([]byte, b)) MustNil(t, err) Equal(t, n, b) b *= 10 } buf.Flush() Equal(t, int(buf.length), expectedLen) bs := buf.GetBytes(nil) actualLen := 0 for i := 0; i < len(bs); i++ { actualLen += len(bs[i]) } Equal(t, actualLen, expectedLen) } // TestLinkBufferWithZero test more case with n is invalid. func TestLinkBufferWithInvalid(t *testing.T) { // clean & new LinkBufferCap = 128 buf := NewLinkBuffer() Equal(t, buf.Len(), 0) MustTrue(t, buf.IsEmpty()) for n := 0; n > -5; n-- { // test writer p, err := buf.Malloc(n) Equal(t, len(p), 0) Equal(t, buf.MallocLen(), 0) Equal(t, buf.Len(), 0) MustNil(t, err) var wn int wn, err = buf.WriteString("") Equal(t, wn, 0) Equal(t, buf.MallocLen(), 0) Equal(t, buf.Len(), 0) MustNil(t, err) wn, err = buf.WriteBinary(nil) Equal(t, wn, 0) Equal(t, buf.MallocLen(), 0) Equal(t, buf.Len(), 0) MustNil(t, err) err = buf.WriteDirect(nil, n) Equal(t, buf.MallocLen(), 0) Equal(t, buf.Len(), 0) MustNil(t, err) var w *LinkBuffer err = buf.Append(w) Equal(t, buf.MallocLen(), 0) Equal(t, buf.Len(), 0) MustNil(t, err) err = buf.MallocAck(n) Equal(t, buf.MallocLen(), 0) Equal(t, buf.Len(), 0) if n == 0 { MustNil(t, err) } else { MustTrue(t, err != nil) } err = buf.Flush() MustNil(t, err) // test reader p, err = buf.Next(n) Equal(t, len(p), 0) MustNil(t, err) p, err = buf.Peek(n) Equal(t, len(p), 0) MustNil(t, err) err = buf.Skip(n) Equal(t, len(p), 0) MustNil(t, err) var s string s, err = buf.ReadString(n) Equal(t, len(s), 0) MustNil(t, err) p, err = buf.ReadBinary(n) Equal(t, len(p), 0) MustNil(t, err) var r Reader r, err = buf.Slice(n) Equal(t, r.Len(), 0) MustNil(t, err) err = buf.Release() MustNil(t, err) } } func TestLinkBufferMultiNode(t *testing.T) { // clean & new LinkBufferCap = 8 buf := NewLinkBuffer() Equal(t, buf.Len(), 0) MustTrue(t, buf.IsEmpty()) var p []byte p, _ = buf.Malloc(15) for i := 0; i < len(p); i++ { // updates p[0] - p[14] to 0 - 14 p[i] = byte(i) } Equal(t, len(p), 15) MustTrue(t, buf.read == buf.flush) Equal(t, buf.read.off, 0) Equal(t, buf.read.malloc, 0) Equal(t, buf.write.off, 0) Equal(t, buf.write.malloc, 15) Equal(t, cap(buf.write.buf), 16) // mcache up-aligned to the power of 2 p, _ = buf.Malloc(7) for i := 0; i < len(p); i++ { // updates p[0] - p[6] to 15 - 21 p[i] = byte(i + 15) } Equal(t, len(p), 7) MustTrue(t, buf.read == buf.flush) Equal(t, buf.read.off, 0) Equal(t, buf.read.malloc, 0) Equal(t, buf.write.off, 0) Equal(t, buf.write.malloc, 7) Equal(t, cap(buf.write.buf), LinkBufferCap) buf.Flush() MustTrue(t, buf.read != buf.flush) MustTrue(t, buf.flush == buf.write) Equal(t, buf.read.off, 0) Equal(t, len(buf.read.buf), 0) Equal(t, buf.read.next.off, 0) Equal(t, len(buf.read.next.buf), 15) Equal(t, buf.flush.off, 0) Equal(t, buf.flush.malloc, 7) Equal(t, len(buf.flush.buf), 7) p, _ = buf.Next(13) Equal(t, len(p), 13) Equal(t, p[0], byte(0)) Equal(t, p[12], byte(12)) MustTrue(t, buf.read != buf.flush) Equal(t, buf.read.off, 13) Equal(t, buf.read.Len(), 2) Equal(t, buf.read.next.Len(), 7) Equal(t, buf.flush.off, 0) Equal(t, buf.flush.malloc, 7) MustTrue(t, buf.read.readExposed()) // single-node Next MustTrue(t, !buf.flush.readExposed()) // not touched yet // Peek p, _ = buf.Peek(4) Equal(t, len(p), 4) Equal(t, p[0], byte(13)) Equal(t, p[1], byte(14)) Equal(t, p[2], byte(15)) Equal(t, p[3], byte(16)) Equal(t, len(buf.cachePeek), 4) p, _ = buf.Peek(3) // case: smaller than the last call Equal(t, len(p), 3) Equal(t, p[0], byte(13)) Equal(t, p[2], byte(15)) Equal(t, len(buf.cachePeek), 4) p, _ = buf.Peek(5) // case: Peek than the max call, and cap(buf.cachePeek) < n Equal(t, len(p), 5) Equal(t, p[0], byte(13)) Equal(t, p[4], byte(17)) Equal(t, len(buf.cachePeek), 5) p, _ = buf.Peek(6) // case: Peek than the last call, and cap(buf.cachePeek) > n Equal(t, len(p), 6) Equal(t, p[0], byte(13)) Equal(t, p[5], byte(18)) Equal(t, len(buf.cachePeek), 6) MustTrue(t, buf.read != buf.flush) Equal(t, buf.read.off, 13) Equal(t, buf.read.Len(), 2) Equal(t, buf.flush.off, 0) Equal(t, buf.flush.malloc, 7) MustTrue(t, !buf.flush.readExposed()) // multi-node Peek copies, doesn't expose // Peek ends buf.book(block8k, block8k) MustTrue(t, buf.flush == buf.write) Equal(t, buf.flush.off, 0) Equal(t, buf.flush.malloc, 8) Equal(t, buf.flush.Len(), 7) Equal(t, buf.write.off, 0) Equal(t, buf.write.malloc, 8) Equal(t, buf.write.Len(), 7) buf.book(block8k, block8k) MustTrue(t, buf.flush != buf.write) Equal(t, buf.flush.off, 0) Equal(t, buf.flush.malloc, 8) Equal(t, buf.flush.Len(), 7) Equal(t, buf.write.off, 0) Equal(t, buf.write.malloc, 8192) Equal(t, buf.write.Len(), 0) buf.MallocAck(5) MustTrue(t, buf.flush != buf.write) Equal(t, buf.write.off, 0) Equal(t, buf.write.malloc, 4) Equal(t, buf.write.Len(), 0) MustTrue(t, buf.write.next == nil) buf.Flush() p, _ = buf.Next(8) Equal(t, len(p), 8) MustTrue(t, buf.read != buf.flush) Equal(t, buf.read.off, 6) Equal(t, buf.read.Len(), 2) Equal(t, buf.flush.off, 0) Equal(t, buf.flush.malloc, 4) Equal(t, buf.flush.Len(), 4) err := buf.Skip(3) MustNil(t, err) MustTrue(t, buf.read == buf.flush) Equal(t, buf.read.off, 1) Equal(t, buf.read.Len(), 3) Equal(t, buf.flush.malloc, 4) } func TestLinkBufferRefer(t *testing.T) { // clean & new LinkBufferCap = 8 wbuf := NewLinkBuffer() wbuf.book(block8k, block8k) wbuf.Malloc(7) wbuf.Flush() Equal(t, wbuf.Len(), block8k+7) buf := NewLinkBuffer() var p []byte // writev buf.WriteBuffer(wbuf) buf.Flush() Equal(t, buf.Len(), block8k+7) p, _ = buf.Next(5) Equal(t, len(p), 5) MustTrue(t, buf.read != buf.flush) Equal(t, buf.read.off, 5) Equal(t, buf.read.Len(), block8k-5) Equal(t, buf.flush.off, 0) Equal(t, buf.flush.malloc, 7) Equal(t, cap(buf.flush.buf), 8) MustTrue(t, buf.read.readExposed()) // single-node Next // readv _rbuf, err := buf.Slice(4) rbuf, ok := _rbuf.(*LinkBuffer) MustNil(t, err) MustTrue(t, ok) Equal(t, rbuf.Len(), 4) MustTrue(t, rbuf.read != rbuf.flush) Equal(t, rbuf.read.off, 0) Equal(t, rbuf.read.Len(), 4) MustTrue(t, buf.head != buf.read) // Slice will Release MustTrue(t, rbuf.read != buf.read) Equal(t, buf.Len(), block8k-2) MustTrue(t, buf.read != buf.flush) Equal(t, buf.read.off, 9) Equal(t, buf.read.malloc, block8k) // release node1 := rbuf.head node2 := buf.head rbuf.Skip(rbuf.Len()) err = rbuf.Release() MustNil(t, err) MustTrue(t, rbuf.head != node1) MustTrue(t, buf.head == node2) err = buf.Release() MustNil(t, err) MustTrue(t, buf.head != node2) MustTrue(t, buf.head == buf.read) Equal(t, buf.read.off, 9) Equal(t, buf.read.malloc, block8k) Equal(t, buf.read.refer, int32(1)) Equal(t, buf.read.Len(), block8k-9) } func TestLinkBufferResetTail(t *testing.T) { except := byte(1) LinkBufferCap = 8 buf := NewLinkBuffer() // 1. slice reader buf.WriteByte(except) buf.Flush() r1, _ := buf.Slice(1) t.Logf("1: %x\n", buf.flush.buf) // 2. release & reset tail buf.resetTail(LinkBufferCap) buf.WriteByte(byte(2)) t.Logf("2: %x\n", buf.flush.buf) // check slice reader got, _ := r1.ReadByte() Equal(t, got, except) } func TestLinkBufferWriteBuffer(t *testing.T) { buf1 := NewLinkBuffer() buf2 := NewLinkBuffer() b2, _ := buf2.Malloc(1) b2[0] = 2 buf2.Flush() buf3 := NewLinkBuffer() b3, _ := buf3.Malloc(1) b3[0] = 3 buf3.Flush() buf1.WriteBuffer(buf2) buf1.WriteBuffer(buf3) buf1.Flush() MustTrue(t, bytes.Equal(buf1.Bytes(), []byte{2, 3})) } func TestLinkBufferCheckSingleNode(t *testing.T) { buf := NewLinkBuffer(block4k) _, err := buf.Malloc(block8k) MustNil(t, err) buf.Flush() MustTrue(t, buf.read.Len() == 0) is := buf.isSingleNode(block8k) MustTrue(t, is) MustTrue(t, buf.read.Len() == block8k) is = buf.isSingleNode(block8k + 1) MustTrue(t, !is) // cross node malloc, but b.read.Len() still == 0 buf = NewLinkBuffer(block4k) _, err = buf.Malloc(block8k) MustNil(t, err) // not malloc ack yet // read function will call isSingleNode inside buf.isSingleNode(1) } func TestLinkBufferWriteMultiFlush(t *testing.T) { buf := NewLinkBuffer() b1, _ := buf.Malloc(4) b1[0] = 1 b1[2] = 2 err := buf.Flush() MustNil(t, err) err = buf.Flush() MustNil(t, err) MustTrue(t, buf.Bytes()[0] == 1) MustTrue(t, len(buf.Bytes()) == 4) err = buf.Skip(2) MustNil(t, err) MustTrue(t, buf.Bytes()[0] == 2) MustTrue(t, len(buf.Bytes()) == 2) err = buf.Flush() MustNil(t, err) MustTrue(t, buf.Bytes()[0] == 2) MustTrue(t, len(buf.Bytes()) == 2) b2, _ := buf.Malloc(2) b2[0] = 3 err = buf.Flush() MustNil(t, err) MustTrue(t, buf.Bytes()[0] == 2) MustTrue(t, buf.Bytes()[2] == 3) MustTrue(t, len(buf.Bytes()) == 4) } func TestLinkBufferWriteBinary(t *testing.T) { // clean & new LinkBufferCap = 8 // new b: cap=16, len=9 b := make([]byte, 16) buf := NewLinkBuffer() buf.WriteBinary(b[:9]) buf.Flush() // Currently, b[9:] should no longer be held. // WriteBinary/Malloc etc. cannot start from b[9:] buf.WriteBinary([]byte{1}) Equal(t, b[9], byte(0)) bs, err := buf.Malloc(1) MustNil(t, err) bs[0] = 2 buf.Flush() Equal(t, b[9], byte(0)) } func TestLinkBufferWriteDirect(t *testing.T) { // clean & new LinkBufferCap = 32 buf := NewLinkBuffer() bt, _ := buf.Malloc(32) bt[0] = 'a' bt[1] = 'b' buf.WriteDirect([]byte("cdef"), 30) bt[2] = 'g' buf.WriteDirect([]byte("hijkl"), 29) bt[3] = 'm' buf.WriteDirect([]byte("nopqrst"), 28) bt[4] = 'u' buf.WriteDirect([]byte("vwxyz"), 27) copy(bt[5:], "abcdefghijklmnopqrstuvwxyza") buf.WriteDirect([]byte("abcdefghijklmnopqrstuvwxyz"), 0) buf.Flush() bs := buf.Bytes() str := "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzaabcdefghijklmnopqrstuvwxyz" for i := 0; i < len(str); i++ { if bs[i] != str[i] { t.Error("not equal!") } } } func TestLinkBufferBufferMode(t *testing.T) { bufnode := newLinkBufferNode(0) MustTrue(t, bufnode.getFlag(flagUnmanaged)) MustTrue(t, !bufnode.reusable()) MustTrue(t, !bufnode.readExposed()) bufnode = newLinkBufferNode(1) MustTrue(t, !bufnode.getFlag(flagUnmanaged)) MustTrue(t, bufnode.reusable()) MustTrue(t, !bufnode.readExposed()) } func TestLinkBufferReadCopy(t *testing.T) { t.Run("SingleNode", func(t *testing.T) { LinkBufferCap = 128 buf := NewLinkBuffer(128) p, _ := buf.Malloc(16) for i := range p { p[i] = byte(i) } buf.Flush() dst := make([]byte, 10) n := buf.readCopy(dst) Equal(t, n, 10) for i := 0; i < 10; i++ { Equal(t, dst[i], byte(i)) } Equal(t, buf.Len(), 6) // readCopy must not set readExposed MustTrue(t, !buf.read.readExposed()) }) t.Run("MultiNode", func(t *testing.T) { LinkBufferCap = 8 buf := NewLinkBuffer(8) p, _ := buf.Malloc(8) for i := range p { p[i] = byte(i) } buf.Flush() p, _ = buf.Malloc(8) for i := range p { p[i] = byte(i + 8) } buf.Flush() dst := make([]byte, 16) n := buf.readCopy(dst) Equal(t, n, 16) for i := 0; i < 16; i++ { Equal(t, dst[i], byte(i)) } Equal(t, buf.Len(), 0) }) t.Run("PartialRead", func(t *testing.T) { LinkBufferCap = 128 buf := NewLinkBuffer(128) p, _ := buf.Malloc(4) for i := range p { p[i] = byte(i + 1) } buf.Flush() // read more than available dst := make([]byte, 16) n := buf.readCopy(dst) Equal(t, n, 4) Equal(t, dst[0], byte(1)) Equal(t, dst[3], byte(4)) Equal(t, buf.Len(), 0) }) t.Run("ReleasesNonExposedNodes", func(t *testing.T) { LinkBufferCap = 8 buf := NewLinkBuffer(8) buf.Malloc(8) buf.Flush() buf.Malloc(8) buf.Flush() node1 := buf.read dst := make([]byte, 16) buf.readCopy(dst) // node1 was not exposed, should be released (head advanced past it) MustTrue(t, buf.head != node1) }) t.Run("SkipsExposedNodes", func(t *testing.T) { LinkBufferCap = 8 buf := NewLinkBuffer(8) p, _ := buf.Malloc(8) for i := range p { p[i] = byte(i) } buf.Flush() buf.Malloc(8) buf.Flush() // expose node1 via Peek buf.Peek(4) node1 := buf.read MustTrue(t, node1.readExposed()) // readCopy past both nodes dst := make([]byte, 16) n := buf.readCopy(dst) Equal(t, n, 16) Equal(t, dst[0], byte(0)) // head should stay at exposed node1 Equal(t, buf.head, node1) // subsequent Release frees the exposed node buf.Release() MustTrue(t, buf.head != node1) }) // [exposed/consumed] → [not-exposed/consumed] → [partial-consumed/read] t.Run("ExposedThenNonExposedThenPartial", func(t *testing.T) { LinkBufferCap = 8 buf := NewLinkBuffer(8) // node1: 8 bytes p, _ := buf.Malloc(8) for i := range p { p[i] = byte(i) } buf.Flush() // node2: 8 bytes p, _ = buf.Malloc(8) for i := range p { p[i] = byte(i + 8) } buf.Flush() // node3: 8 bytes p, _ = buf.Malloc(8) for i := range p { p[i] = byte(i + 16) } buf.Flush() // expose node1 via Peek buf.Peek(4) node1 := buf.read node2 := node1.next MustTrue(t, node1.readExposed()) MustTrue(t, !node2.readExposed()) // readCopy 20 bytes: consumes node1(8) + node2(8) + 4 from node3 dst := make([]byte, 20) n := buf.readCopy(dst) Equal(t, n, 20) for i := 0; i < 20; i++ { Equal(t, dst[i], byte(i)) } Equal(t, buf.Len(), 4) // head should be node1 (exposed, kept in chain) Equal(t, buf.head, node1) // node2 was released, node1.next should skip to read (node3) Equal(t, node1.next, buf.read) // subsequent Release frees the exposed node buf.Release() MustTrue(t, buf.head == buf.read) }) } func BenchmarkLinkBufferConcurrentReadWrite(b *testing.B) { b.StopTimer() buf := NewLinkBuffer() var rwTag uint32 readMsg := []string{ "0123456", "7890123", "4567890", "1234567", "8901234", "5678901", "2345678", "9012345", "6789012", "3456789", } writeMsg := []byte("0123456789") // benchmark b.ReportAllocs() b.StartTimer() b.SetParallelism(2) // one read one write b.RunParallel(func(pb *testing.PB) { switch atomic.AddUint32(&rwTag, 1) { case 1: // 1 is write for pb.Next() { p, err := buf.Malloc(80) if err != nil { panic(fmt.Sprintf("malloc error %s", err.Error())) } for i := 0; i < 7; i++ { copy(p[i*10:i*10+10], writeMsg) } buf.MallocAck(70) buf.Flush() } case 2: // 2 is read for pb.Next() { for i := 0; i < 10; { p, err := buf.Next(7) if err == nil { if string(p) != readMsg[i] { panic(fmt.Sprintf("NEXT p[%s] != msg[%s]", p, readMsg[i])) } } else { // No read data, wait for write continue } i++ } buf.Release() } } }) } func TestUnsafeStringToSlice(t *testing.T) { s := "hello world" bs := unsafeStringToSlice(s) s = "hi, boy" _ = s Equal(t, string(bs), "hello world") } func TestLinkBufferIndexByte(t *testing.T) { // clean & new LinkBufferCap = 128 loopSize := 1000 trigger := make(chan struct{}, 16) lb := NewLinkBuffer() empty := make([]byte, 1002) go func() { for i := 0; i < loopSize; i++ { buf, err := lb.Malloc(1002) // need clear buffer copy(buf, empty) buf[500] = '\n' buf[1001] = '\n' MustNil(t, err) lb.Flush() trigger <- struct{}{} } }() for i := 0; i < loopSize; i++ { <-trigger last := i * 1002 n := lb.indexByte('\n', 0+last) Equal(t, n, 500+last) n = lb.indexByte('\n', 500+last) Equal(t, n, 500+last) n = lb.indexByte('\n', 501+last) Equal(t, n, 1001+last) } } func TestLinkBufferPeekOutOfMemory(t *testing.T) { bufCap := 1024 * 8 bufNodes := 100 magicN := uint64(2024) buf := NewLinkBuffer(bufCap) MustTrue(t, buf.IsEmpty()) Equal(t, cap(buf.write.buf), bufCap) Equal(t, buf.memorySize(), bufCap) var p []byte var err error // write data that cross multi nodes for n := 0; n < bufNodes; n++ { p, err = buf.Malloc(bufCap) MustNil(t, err) Equal(t, len(p), bufCap) binary.BigEndian.PutUint64(p, magicN) } Equal(t, buf.MallocLen(), bufCap*bufNodes) buf.Flush() Equal(t, buf.MallocLen(), 0) // peak data that in single node for i := 0; i < 10; i++ { p, err = buf.Peek(bufCap) Equal(t, binary.BigEndian.Uint64(p), magicN) MustNil(t, err) Equal(t, len(p), bufCap) Equal(t, buf.memorySize(), bufCap*bufNodes) } // peak data that cross nodes memorySize := 0 for i := 0; i < 1024; i++ { p, err = buf.Peek(bufCap + 1) MustNil(t, err) Equal(t, binary.BigEndian.Uint64(p), magicN) Equal(t, len(p), bufCap+1) if memorySize == 0 { memorySize = buf.memorySize() t.Logf("after Peek: memorySize=%d", memorySize) } else { Equal(t, buf.memorySize(), memorySize) } } } func TestMallocAck(t *testing.T) { sLen := 1024 * 7 buf1 := []byte{1, 2, 3, 4} buf2 := []byte{5, 6, 7, 8} lb := NewLinkBuffer(0) buf, err := lb.Malloc(4 + sLen) MustNil(t, err) copy(buf[:4], buf1) s := make([]byte, sLen) err = lb.WriteDirect(s, sLen) MustNil(t, err) err = lb.MallocAck(4 + sLen) MustNil(t, err) lb.Flush() buf, err = lb.Malloc(4) MustNil(t, err) copy(buf[:4], buf2) lb.Flush() buf, err = lb.Next(8 + sLen) MustNil(t, err) MustTrue(t, reflect.DeepEqual(buf, append(append(buf1, s...), buf2...))) } func BenchmarkStringToSliceByte(b *testing.B) { b.StopTimer() s := "hello world" var bs []byte if false { b.Logf("bs = %s", bs) } // benchmark b.ReportAllocs() b.StartTimer() for i := 0; i < b.N; i++ { bs = unsafeStringToSlice(s) } _ = bs } func BenchmarkStringToCopy(b *testing.B) { b.StopTimer() s := "hello world" var bs []byte b.Logf("bs = %s", bs) // benchmark b.ReportAllocs() b.StartTimer() for i := 0; i < b.N; i++ { bs = []byte(s) } _ = bs } func BenchmarkLinkBufferPoolGet(b *testing.B) { var v *linkBufferNode if false { b.Logf("bs = %v", v) } // benchmark b.ReportAllocs() b.SetParallelism(100) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { for pb.Next() { v = newLinkBufferNode(0) v.Release() } }) } func BenchmarkCopyString(b *testing.B) { s := make([]byte, 128*1024) // benchmark b.ReportAllocs() b.SetParallelism(100) b.ResetTimer() b.RunParallel(func(pb *testing.PB) { v := make([]byte, 1024) for pb.Next() { copy(v, s) } }) } func BenchmarkLinkBufferNoCopyRead(b *testing.B) { totalSize := 0 minSize := 32 maxSize := minSize << 9 for size := minSize; size <= maxSize; size = size << 1 { totalSize += size } b.ReportAllocs() b.ResetTimer() b.RunParallel(func(pb *testing.PB) { buffer := NewLinkBuffer(pagesize) for pb.Next() { buf, err := buffer.Malloc(totalSize) if len(buf) != totalSize || err != nil { b.Fatal(err) } err = buffer.MallocAck(totalSize) if err != nil { b.Fatal(err) } err = buffer.Flush() if err != nil { b.Fatal(err) } for size := minSize; size <= maxSize; size = size << 1 { buf, err = buffer.ReadBinary(size) if len(buf) != size || err != nil { b.Fatal(err) } } // buffer.Release will not reuse memory since we use no copy mode here err = buffer.Release() if err != nil { b.Fatal(err) } } }) } ================================================ FILE: nocopy_readwriter.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package netpoll import ( "fmt" "io" ) const maxReadCycle = 16 func newZCReader(r io.Reader) *zcReader { return &zcReader{ r: r, buf: NewLinkBuffer(), } } var _ Reader = &zcReader{} // zcReader implements Reader. type zcReader struct { r io.Reader buf *LinkBuffer } // Next implements Reader. func (r *zcReader) Next(n int) (p []byte, err error) { if err = r.waitRead(n); err != nil { return p, err } return r.buf.Next(n) } // Peek implements Reader. func (r *zcReader) Peek(n int) (buf []byte, err error) { if err = r.waitRead(n); err != nil { return buf, err } return r.buf.Peek(n) } // Skip implements Reader. func (r *zcReader) Skip(n int) (err error) { if err = r.waitRead(n); err != nil { return err } return r.buf.Skip(n) } // Release implements Reader. func (r *zcReader) Release() (err error) { return r.buf.Release() } // Slice implements Reader. func (r *zcReader) Slice(n int) (reader Reader, err error) { if err = r.waitRead(n); err != nil { return nil, err } return r.buf.Slice(n) } // Len implements Reader. func (r *zcReader) Len() (length int) { return r.buf.Len() } // ReadString implements Reader. func (r *zcReader) ReadString(n int) (s string, err error) { if err = r.waitRead(n); err != nil { return s, err } return r.buf.ReadString(n) } // ReadBinary implements Reader. func (r *zcReader) ReadBinary(n int) (p []byte, err error) { if err = r.waitRead(n); err != nil { return p, err } return r.buf.ReadBinary(n) } // ReadByte implements Reader. func (r *zcReader) ReadByte() (b byte, err error) { if err = r.waitRead(1); err != nil { return b, err } return r.buf.ReadByte() } func (r *zcReader) Until(delim byte) (line []byte, err error) { return r.buf.Until(delim) } func (r *zcReader) waitRead(n int) (err error) { for r.buf.Len() < n { err = r.fill(n) if err != nil { if err == io.EOF { err = Exception(ErrEOF, "") } return err } } return nil } // fill buffer to greater than n, range no more than 16 times. func (r *zcReader) fill(n int) (err error) { var buf []byte var num int for i := 0; i < maxReadCycle && r.buf.Len() < n && err == nil; i++ { buf, err = r.buf.Malloc(block4k) if err != nil { return err } num, err = r.r.Read(buf) if num < 0 { if err == nil { err = fmt.Errorf("zcReader fill negative count[%d]", num) } num = 0 } r.buf.MallocAck(num) r.buf.Flush() if err != nil { return err } } return err } func newZCWriter(w io.Writer) *zcWriter { return &zcWriter{ w: w, buf: NewLinkBuffer(), } } var _ Writer = &zcWriter{} // zcWriter implements Writer. type zcWriter struct { w io.Writer buf *LinkBuffer } // Malloc implements Writer. func (w *zcWriter) Malloc(n int) (buf []byte, err error) { return w.buf.Malloc(n) } // MallocLen implements Writer. func (w *zcWriter) MallocLen() (length int) { return w.buf.MallocLen() } // Flush implements Writer. func (w *zcWriter) Flush() (err error) { w.buf.Flush() n, err := w.w.Write(w.buf.Bytes()) if n > 0 { w.buf.Skip(n) w.buf.Release() } return err } // MallocAck implements Writer. func (w *zcWriter) MallocAck(n int) (err error) { return w.buf.MallocAck(n) } // Append implements Writer. func (w *zcWriter) Append(w2 Writer) (err error) { return w.buf.Append(w2) } // WriteString implements Writer. func (w *zcWriter) WriteString(s string) (n int, err error) { return w.buf.WriteString(s) } // WriteBinary implements Writer. func (w *zcWriter) WriteBinary(b []byte) (n int, err error) { return w.buf.WriteBinary(b) } // WriteDirect implements Writer. func (w *zcWriter) WriteDirect(p []byte, remainCap int) error { return w.buf.WriteDirect(p, remainCap) } // WriteByte implements Writer. func (w *zcWriter) WriteByte(b byte) (err error) { return w.buf.WriteByte(b) } // zcWriter implements ReadWriter. type zcReadWriter struct { *zcReader *zcWriter } func newIOReader(r Reader) *ioReader { return &ioReader{ r: r, } } var _ io.Reader = &ioReader{} // ioReader implements io.Reader. // // Deprecated: connection already implements Read directly with optimized buffer access. // This wrapper exists only for external Reader implementations. type ioReader struct { r Reader } // Read implements io.Reader. // // BUG: Read calls Release which invalidates any slices previously returned by Next or Peek // on the same Reader. Do not mix Next/Peek and Read on the same Reader without first // calling Release. func (r *ioReader) Read(p []byte) (n int, err error) { l := len(p) if l == 0 { return 0, nil } // read min(len(p), buffer.Len) if has := r.r.Len(); has < l { l = has } if l == 0 { return 0, io.EOF } src, err := r.r.Next(l) if err != nil { return 0, err } n = copy(p, src) err = r.r.Release() if err != nil { return 0, err } return n, nil } func newIOWriter(w Writer) *ioWriter { return &ioWriter{ w: w, } } var _ io.Writer = &ioWriter{} // ioWriter implements io.Writer. type ioWriter struct { w Writer } // Write implements io.Writer. func (w *ioWriter) Write(p []byte) (n int, err error) { dst, err := w.w.Malloc(len(p)) if err != nil { return 0, err } n = copy(dst, p) err = w.w.Flush() if err != nil { return 0, err } return n, nil } // ioReadWriter implements io.ReadWriter. type ioReadWriter struct { io.Reader io.Writer } ================================================ FILE: nocopy_readwriter_test.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !windows // +build !windows package netpoll import ( "errors" "io" "io/ioutil" "testing" ) func TestZCReader(t *testing.T) { reader := &MockIOReadWriter{ read: func(p []byte) (n int, err error) { return len(p), nil }, } r := newZCReader(reader) p, err := r.Next(block8k) MustNil(t, err) Equal(t, len(p), block8k) Equal(t, r.buf.Len(), 0) p, err = r.Peek(block4k) MustNil(t, err) Equal(t, len(p), block4k) Equal(t, r.buf.Len(), block4k) err = r.Skip(block4k) MustNil(t, err) Equal(t, r.buf.Len(), 0) err = r.Release() MustNil(t, err) } func TestZCWriter(t *testing.T) { writer := &MockIOReadWriter{ write: func(p []byte) (n int, err error) { return len(p), nil }, } w := newZCWriter(writer) p, err := w.Malloc(block1k) MustNil(t, err) Equal(t, len(p), block1k) Equal(t, w.buf.Len(), 0) err = w.Flush() MustNil(t, err) Equal(t, w.buf.Len(), 0) p, err = w.Malloc(block2k) MustNil(t, err) Equal(t, len(p), block2k) Equal(t, w.buf.Len(), 0) err = w.buf.Flush() MustNil(t, err) Equal(t, w.buf.Len(), block2k) err = w.Flush() MustNil(t, err) Equal(t, w.buf.Len(), 0) } func TestZCEOF(t *testing.T) { reader := &MockIOReadWriter{ read: func(p []byte) (n int, err error) { return 0, io.EOF }, } r := newZCReader(reader) _, err := r.Next(block8k) MustTrue(t, errors.Is(err, ErrEOF)) } type MockIOReadWriter struct { read func(p []byte) (n int, err error) write func(p []byte) (n int, err error) } func (rw *MockIOReadWriter) Read(p []byte) (n int, err error) { if rw.read != nil { return rw.read(p) } return } func (rw *MockIOReadWriter) Write(p []byte) (n int, err error) { if rw.write != nil { return rw.write(p) } return } func TestIOReadWriter(t *testing.T) { buf := NewLinkBuffer(block1k) reader, writer := newIOReader(buf), newIOWriter(buf) msg := []byte("hello world") n, err := writer.Write(msg) MustNil(t, err) Equal(t, n, len(msg)) p := make([]byte, block1k) n, err = reader.Read(p) MustNil(t, err) Equal(t, n, len(msg)) } func TestIOReadWriter2(t *testing.T) { buf := NewLinkBuffer(block1k) reader, writer := newIOReader(buf), newIOWriter(buf) msg := []byte("hello world") n, err := writer.Write(msg) MustNil(t, err) Equal(t, n, len(msg)) p, err := ioutil.ReadAll(reader) MustNil(t, err) Equal(t, len(p), len(msg)) } ================================================ FILE: poll.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package netpoll // Poll monitors fd(file descriptor), calls the FDOperator to perform specific actions, // and shields underlying differences. On linux systems, poll uses epoll by default, // and kevent by default on bsd systems. type Poll interface { // Wait will poll all registered fds, and schedule processing based on the triggered event. // The call will block, so the usage can be like: // // go wait() // Wait() error // Close the poll and shutdown Wait(). Close() error // Trigger can be used to actively refresh the loop where Wait is located when no event is triggered. // On linux systems, eventfd is used by default, and kevent by default on bsd systems. Trigger() error // Control the event of file descriptor and the operations is defined by PollEvent. Control(operator *FDOperator, event PollEvent) error // Alloc the operator from cache. Alloc() (operator *FDOperator) // Free the operator from cache. Free(operator *FDOperator) } // PollEvent defines the operation of poll.Control. type PollEvent int const ( // PollReadable is used to monitor whether the FDOperator registered by // listener and connection is readable or closed. PollReadable PollEvent = 0x1 // PollWritable is used to monitor whether the FDOperator created by the dialer is writable or closed. // ET mode must be used (still need to poll hup after being writable) PollWritable PollEvent = 0x2 // PollDetach is used to remove the FDOperator from poll. PollDetach PollEvent = 0x3 // PollR2RW is used to monitor writable for FDOperator, // which is only called when the socket write buffer is full. PollR2RW PollEvent = 0x5 // PollRW2R is used to remove the writable monitor of FDOperator, generally used with PollR2RW. PollRW2R PollEvent = 0x6 ) ================================================ FILE: poll_default.go ================================================ // Copyright 2023 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build darwin || netbsd || freebsd || openbsd || dragonfly || linux // +build darwin netbsd freebsd openbsd dragonfly linux package netpoll func (p *defaultPoll) Alloc() (operator *FDOperator) { op := p.opcache.alloc() op.poll = p return op } func (p *defaultPoll) Free(operator *FDOperator) { p.opcache.freeable(operator) } func (p *defaultPoll) appendHup(operator *FDOperator) { p.hups = append(p.hups, operator.OnHup) p.detach(operator) operator.done() } func (p *defaultPoll) detach(operator *FDOperator) { if err := operator.Control(PollDetach); err != nil { logger.Printf("NETPOLL: poller detach operator failed: %v", err) } } func (p *defaultPoll) onhups() { if len(p.hups) == 0 { return } hups := p.hups p.hups = nil go func(onhups []func(p Poll) error) { for i := range onhups { if onhups[i] != nil { onhups[i](p) } } }(hups) } // readall read all left data before close connection func readall(op *FDOperator, br barrier) (total int, err error) { ivs := br.ivs var n int for { bs := op.Inputs(br.bs) if len(bs) == 0 { return total, nil } TryRead: n, err = ioread(op.FD, bs, ivs) op.InputAck(n) total += n if err != nil { return total, err } if n == 0 { goto TryRead } } } ================================================ FILE: poll_default_bsd.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build darwin || netbsd || freebsd || openbsd || dragonfly // +build darwin netbsd freebsd openbsd dragonfly package netpoll import ( "errors" "sync" "sync/atomic" "syscall" "unsafe" ) func openPoll() (Poll, error) { return openDefaultPoll() } func openDefaultPoll() (*defaultPoll, error) { l := new(defaultPoll) p, err := syscall.Kqueue() if err != nil { return nil, err } l.fd = p _, err = syscall.Kevent(l.fd, []syscall.Kevent_t{{ Ident: 0, Filter: syscall.EVFILT_USER, Flags: syscall.EV_ADD | syscall.EV_CLEAR, }}, nil, nil) if err != nil { syscall.Close(l.fd) return nil, err } l.opcache = newOperatorCache() return l, nil } type defaultPoll struct { fd int trigger uint32 m sync.Map //nolint:unused // only used in go:race opcache *operatorCache // operator cache hups []func(p Poll) error } // Wait implements Poll. func (p *defaultPoll) Wait() error { // init size, caps := 1024, barriercap events, barriers := make([]syscall.Kevent_t, size), make([]barrier, size) for i := range barriers { barriers[i].bs = make([][]byte, caps) barriers[i].ivs = make([]syscall.Iovec, caps) } // wait var triggerRead, triggerWrite, triggerHup bool for { n, err := syscall.Kevent(p.fd, nil, events, nil) if err != nil && err != syscall.EINTR { // exit gracefully if err == syscall.EBADF { return nil } return err } for i := 0; i < n; i++ { fd := int(events[i].Ident) // trigger if fd == 0 { // clean trigger atomic.StoreUint32(&p.trigger, 0) continue } operator := p.getOperator(fd, unsafe.Pointer(&events[i].Udata)) if operator == nil || !operator.do() { continue } var totalRead int evt := events[i] triggerRead = evt.Filter == syscall.EVFILT_READ && evt.Flags&syscall.EV_ENABLE != 0 triggerWrite = evt.Filter == syscall.EVFILT_WRITE && evt.Flags&syscall.EV_ENABLE != 0 triggerHup = evt.Flags&syscall.EV_EOF != 0 if triggerRead { if operator.OnRead != nil { // for non-connection operator.OnRead(p) } else { // only for connection bs := operator.Inputs(barriers[i].bs) if len(bs) > 0 { n, err := ioread(operator.FD, bs, barriers[i].ivs) operator.InputAck(n) totalRead += n if err != nil { p.appendHup(operator) continue } } } } if triggerHup { if triggerRead && operator.Inputs != nil { var leftRead int // read all left data if peer send and close if leftRead, err = readall(operator, barriers[i]); err != nil && !errors.Is(err, ErrEOF) { logger.Printf("NETPOLL: readall(fd=%d)=%d before close: %s", operator.FD, total, err.Error()) } totalRead += leftRead } // only close connection if no further read bytes if totalRead == 0 { p.appendHup(operator) continue } } if triggerWrite { if operator.OnWrite != nil { // for non-connection operator.OnWrite(p) } else { // only for connection bs, supportZeroCopy := operator.Outputs(barriers[i].bs) if len(bs) > 0 { // TODO: Let the upper layer pass in whether to use ZeroCopy. n, err := iosend(operator.FD, bs, barriers[i].ivs, false && supportZeroCopy) operator.OutputAck(n) if err != nil { p.appendHup(operator) continue } } } } operator.done() } // hup conns together to avoid blocking the poll. p.onhups() p.opcache.free() } } // TODO: Close will bad file descriptor here func (p *defaultPoll) Close() error { err := syscall.Close(p.fd) return err } // Trigger implements Poll. func (p *defaultPoll) Trigger() error { if atomic.AddUint32(&p.trigger, 1) > 1 { return nil } _, err := syscall.Kevent(p.fd, []syscall.Kevent_t{{ Ident: 0, Filter: syscall.EVFILT_USER, Fflags: syscall.NOTE_TRIGGER, }}, nil, nil) return err } // Control implements Poll. func (p *defaultPoll) Control(operator *FDOperator, event PollEvent) error { evs := make([]syscall.Kevent_t, 1) evs[0].Ident = uint64(operator.FD) p.setOperator(unsafe.Pointer(&evs[0].Udata), operator) switch event { case PollReadable: operator.inuse() evs[0].Filter, evs[0].Flags = syscall.EVFILT_READ, syscall.EV_ADD|syscall.EV_ENABLE case PollWritable: operator.inuse() evs[0].Filter, evs[0].Flags = syscall.EVFILT_WRITE, syscall.EV_ADD|syscall.EV_ENABLE case PollDetach: if operator.OnWrite != nil { // means WaitWrite finished evs[0].Filter, evs[0].Flags = syscall.EVFILT_WRITE, syscall.EV_DELETE } else { evs[0].Filter, evs[0].Flags = syscall.EVFILT_READ, syscall.EV_DELETE } p.delOperator(operator) case PollR2RW: evs[0].Filter, evs[0].Flags = syscall.EVFILT_WRITE, syscall.EV_ADD|syscall.EV_ENABLE case PollRW2R: evs[0].Filter, evs[0].Flags = syscall.EVFILT_WRITE, syscall.EV_DELETE } _, err := syscall.Kevent(p.fd, evs, nil, nil) return err } ================================================ FILE: poll_default_bsd_norace.go ================================================ // Copyright 2023 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build (darwin || netbsd || freebsd || openbsd || dragonfly) && !race // +build darwin netbsd freebsd openbsd dragonfly // +build !race package netpoll import "unsafe" func (p *defaultPoll) getOperator(fd int, ptr unsafe.Pointer) *FDOperator { return *(**FDOperator)(ptr) } func (p *defaultPoll) setOperator(ptr unsafe.Pointer, operator *FDOperator) { *(**FDOperator)(ptr) = operator } func (p *defaultPoll) delOperator(operator *FDOperator) { } ================================================ FILE: poll_default_bsd_race.go ================================================ // Copyright 2023 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build (darwin || netbsd || freebsd || openbsd || dragonfly) && race // +build darwin netbsd freebsd openbsd dragonfly // +build race package netpoll import "unsafe" func (p *defaultPoll) getOperator(fd int, ptr unsafe.Pointer) *FDOperator { tmp, _ := p.m.Load(fd) if tmp == nil { return nil } return tmp.(*FDOperator) } func (p *defaultPoll) setOperator(ptr unsafe.Pointer, operator *FDOperator) { p.m.Store(operator.FD, operator) } func (p *defaultPoll) delOperator(operator *FDOperator) { p.m.Delete(operator.FD) } ================================================ FILE: poll_default_linux.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package netpoll import ( "errors" "runtime" "sync" "sync/atomic" "syscall" "unsafe" ) func openPoll() (Poll, error) { return openDefaultPoll() } func openDefaultPoll() (*defaultPoll, error) { poll := new(defaultPoll) poll.buf = make([]byte, 8) p, err := EpollCreate(0) if err != nil { return nil, err } poll.fd = p r0, _, e0 := syscall.Syscall(syscall.SYS_EVENTFD2, 0, 0, 0) if e0 != 0 { _ = syscall.Close(poll.fd) return nil, e0 } poll.Reset = poll.reset poll.Handler = poll.handler poll.wop = &FDOperator{FD: int(r0)} if err = poll.Control(poll.wop, PollReadable); err != nil { _ = syscall.Close(poll.wop.FD) _ = syscall.Close(poll.fd) return nil, err } poll.opcache = newOperatorCache() return poll, nil } type defaultPoll struct { pollArgs fd int // epoll fd wop *FDOperator // eventfd, wake epoll_wait buf []byte // read wfd trigger msg trigger uint32 // trigger flag m sync.Map //nolint:unused // only used in go:race opcache *operatorCache // operator cache // fns for handle events Reset func(size, caps int) Handler func(events []epollevent) (closed bool) } type pollArgs struct { size int caps int events []epollevent barriers []barrier hups []func(p Poll) error } func (a *pollArgs) reset(size, caps int) { a.size, a.caps = size, caps a.events, a.barriers = make([]epollevent, size), make([]barrier, size) for i := range a.barriers { a.barriers[i].bs = make([][]byte, a.caps) a.barriers[i].ivs = make([]syscall.Iovec, a.caps) } } // Wait implements Poll. func (p *defaultPoll) Wait() (err error) { // init caps, msec, n := barriercap, -1, 0 p.Reset(128, caps) // wait for { if n == p.size && p.size < 128*1024 { p.Reset(p.size<<1, caps) } n, err = EpollWait(p.fd, p.events, msec) if err != nil && err != syscall.EINTR { return err } if n <= 0 { msec = -1 runtime.Gosched() continue } msec = 0 if p.Handler(p.events[:n]) { return nil } // we can make sure that there is no op remaining if Handler finished p.opcache.free() } } func (p *defaultPoll) handler(events []epollevent) (closed bool) { var triggerRead, triggerWrite, triggerHup, triggerError bool var err error for i := range events { operator := p.getOperator(0, unsafe.Pointer(&events[i].data)) if operator == nil || !operator.do() { continue } var totalRead int evt := events[i].events triggerRead = evt&syscall.EPOLLIN != 0 triggerWrite = evt&syscall.EPOLLOUT != 0 triggerHup = evt&(syscall.EPOLLHUP|syscall.EPOLLRDHUP) != 0 triggerError = evt&syscall.EPOLLERR != 0 // trigger or exit gracefully if operator.FD == p.wop.FD { // must clean trigger first syscall.Read(p.wop.FD, p.buf) atomic.StoreUint32(&p.trigger, 0) // if closed & exit if p.buf[0] > 0 { syscall.Close(p.wop.FD) syscall.Close(p.fd) operator.done() return true } operator.done() continue } if triggerRead { if operator.OnRead != nil { // for non-connection operator.OnRead(p) } else if operator.Inputs != nil { // for connection bs := operator.Inputs(p.barriers[i].bs) if len(bs) > 0 { n, err := ioread(operator.FD, bs, p.barriers[i].ivs) operator.InputAck(n) totalRead += n if err != nil { p.appendHup(operator) continue } } } else { logger.Printf("NETPOLL: operator has critical problem! event=%d operator=%v", evt, operator) } } if triggerHup { if triggerRead && operator.Inputs != nil { // read all left data if peer send and close var leftRead int // read all left data if peer send and close if leftRead, err = readall(operator, p.barriers[i]); err != nil && !errors.Is(err, ErrEOF) { logger.Printf("NETPOLL: readall(fd=%d)=%d before close: %s", operator.FD, totalRead, err.Error()) } totalRead += leftRead } // only close connection if no further read bytes if totalRead == 0 { p.appendHup(operator) continue } } if triggerError { // Under block-zerocopy, the kernel may give an error callback, which is not a real error, just an EAGAIN. // So here we need to check this error, if it is EAGAIN then do nothing, otherwise still mark as hup. if _, _, _, _, err := syscall.Recvmsg(operator.FD, nil, nil, syscall.MSG_ERRQUEUE); err != syscall.EAGAIN { p.appendHup(operator) } else { operator.done() } continue } if triggerWrite { if operator.OnWrite != nil { // for non-connection operator.OnWrite(p) } else if operator.Outputs != nil { // for connection bs, _ := operator.Outputs(p.barriers[i].bs) if len(bs) > 0 { n, err := iosend(operator.FD, bs, p.barriers[i].ivs, false) operator.OutputAck(n) if err != nil { p.appendHup(operator) continue } } } else { logger.Printf("NETPOLL: operator has critical problem! event=%d operator=%v", evt, operator) } } operator.done() } // hup conns together to avoid blocking the poll. p.onhups() return false } // Close will write 10000000 func (p *defaultPoll) Close() error { _, err := syscall.Write(p.wop.FD, []byte{1, 0, 0, 0, 0, 0, 0, 0}) return err } // Trigger implements Poll. func (p *defaultPoll) Trigger() error { if atomic.AddUint32(&p.trigger, 1) > 1 { return nil } // MAX(eventfd) = 0xfffffffffffffffe _, err := syscall.Write(p.wop.FD, []byte{0, 0, 0, 0, 0, 0, 0, 1}) return err } // Control implements Poll. func (p *defaultPoll) Control(operator *FDOperator, event PollEvent) error { // DON'T move `fd=operator.FD` behind inuse() call, we can only access operator before op.inuse() for avoid race // G1: G2: // op.inuse() op.unused() // op.FD -- T1 op.FD = 0 -- T2 // T1 and T2 may happen together fd := operator.FD var op int var evt epollevent p.setOperator(unsafe.Pointer(&evt.data), operator) switch event { case PollReadable: // server accept a new connection and wait read operator.inuse() op, evt.events = syscall.EPOLL_CTL_ADD, syscall.EPOLLIN|syscall.EPOLLRDHUP|syscall.EPOLLERR case PollWritable: // client create a new connection and wait connect finished operator.inuse() op, evt.events = syscall.EPOLL_CTL_ADD, EPOLLET|syscall.EPOLLOUT|syscall.EPOLLRDHUP|syscall.EPOLLERR case PollDetach: // deregister p.delOperator(operator) op, evt.events = syscall.EPOLL_CTL_DEL, syscall.EPOLLIN|syscall.EPOLLOUT|syscall.EPOLLRDHUP|syscall.EPOLLERR case PollR2RW: // connection wait read/write op, evt.events = syscall.EPOLL_CTL_MOD, syscall.EPOLLIN|syscall.EPOLLOUT|syscall.EPOLLRDHUP|syscall.EPOLLERR case PollRW2R: // connection wait read op, evt.events = syscall.EPOLL_CTL_MOD, syscall.EPOLLIN|syscall.EPOLLRDHUP|syscall.EPOLLERR } return EpollCtl(p.fd, op, fd, &evt) } ================================================ FILE: poll_default_linux_norace.go ================================================ // Copyright 2023 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build linux && !race // +build linux,!race package netpoll import "unsafe" func (p *defaultPoll) getOperator(fd int, ptr unsafe.Pointer) *FDOperator { return *(**FDOperator)(ptr) } func (p *defaultPoll) setOperator(ptr unsafe.Pointer, operator *FDOperator) { *(**FDOperator)(ptr) = operator } func (p *defaultPoll) delOperator(operator *FDOperator) { } ================================================ FILE: poll_default_linux_race.go ================================================ // Copyright 2023 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build linux && race // +build linux,race package netpoll import "unsafe" type eventdata struct { fd int32 pad int32 } func (p *defaultPoll) getOperator(fd int, ptr unsafe.Pointer) *FDOperator { data := *(*eventdata)(ptr) tmp, _ := p.m.Load(int(data.fd)) if tmp == nil { return nil } return tmp.(*FDOperator) } func (p *defaultPoll) setOperator(ptr unsafe.Pointer, operator *FDOperator) { *(*eventdata)(ptr) = eventdata{fd: int32(operator.FD)} p.m.Store(operator.FD, operator) } func (p *defaultPoll) delOperator(operator *FDOperator) { p.m.Delete(operator.FD) } ================================================ FILE: poll_default_linux_test.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build linux // +build linux package netpoll import ( "context" "errors" "syscall" "testing" "golang.org/x/sys/unix" ) func TestEpollEvent(t *testing.T) { epollfd, err := EpollCreate(0) MustNil(t, err) defer syscall.Close(epollfd) rfd, wfd := GetSysFdPairs() defer syscall.Close(rfd) defer syscall.Close(wfd) send := []byte("hello") recv := make([]byte, 5) events := make([]epollevent, 128) eventdata1 := [8]byte{0, 0, 0, 0, 0, 0, 0, 1} eventdata2 := [8]byte{0, 0, 0, 0, 0, 0, 0, 2} eventdata3 := [8]byte{0, 0, 0, 0, 0, 0, 0, 3} event1 := &epollevent{ events: syscall.EPOLLIN, data: eventdata1, } event2 := &epollevent{ events: syscall.EPOLLIN, data: eventdata2, } event3 := &epollevent{ events: syscall.EPOLLIN | syscall.EPOLLOUT, data: eventdata3, } // EPOLL: add ,del and add err = EpollCtl(epollfd, unix.EPOLL_CTL_ADD, rfd, event1) MustNil(t, err) err = EpollCtl(epollfd, unix.EPOLL_CTL_DEL, rfd, event1) MustNil(t, err) err = EpollCtl(epollfd, unix.EPOLL_CTL_ADD, rfd, event2) MustNil(t, err) _, err = syscall.Write(wfd, send) MustNil(t, err) n, err := epollWaitUntil(epollfd, events, -1) MustNil(t, err) Equal(t, n, 1) Equal(t, events[0].data, eventdata2) _, err = syscall.Read(rfd, recv) MustTrue(t, err == nil && string(recv) == string(send)) err = EpollCtl(epollfd, unix.EPOLL_CTL_DEL, rfd, event2) MustNil(t, err) // EPOLL: add ,mod and mod err = EpollCtl(epollfd, unix.EPOLL_CTL_ADD, rfd, event1) MustNil(t, err) err = EpollCtl(epollfd, unix.EPOLL_CTL_MOD, rfd, event2) MustNil(t, err) err = EpollCtl(epollfd, unix.EPOLL_CTL_MOD, rfd, event3) MustNil(t, err) _, err = syscall.Write(wfd, send) MustNil(t, err) n, err = epollWaitUntil(epollfd, events, -1) MustNil(t, err) Equal(t, n, 1) Equal(t, events[0].data, eventdata3) _, err = syscall.Read(rfd, recv) MustTrue(t, err == nil && string(recv) == string(send)) Assert(t, events[0].events&syscall.EPOLLIN != 0) Assert(t, events[0].events&syscall.EPOLLOUT != 0) err = EpollCtl(epollfd, unix.EPOLL_CTL_DEL, rfd, event2) MustNil(t, err) } func TestEpollWait(t *testing.T) { epollfd, err := EpollCreate(0) MustNil(t, err) defer syscall.Close(epollfd) rfd, wfd := GetSysFdPairs() defer syscall.Close(wfd) send := []byte("hello") recv := make([]byte, 5) events := make([]epollevent, 128) eventdata := [8]byte{0, 0, 0, 0, 0, 0, 0, 1} // EPOLL: init state event := &epollevent{ events: syscall.EPOLLIN | syscall.EPOLLOUT | syscall.EPOLLRDHUP | syscall.EPOLLERR, data: eventdata, } err = EpollCtl(epollfd, unix.EPOLL_CTL_ADD, rfd, event) MustNil(t, err) _, err = epollWaitUntil(epollfd, events, -1) MustNil(t, err) Assert(t, events[0].events&syscall.EPOLLIN == 0) Assert(t, events[0].events&syscall.EPOLLOUT != 0) // EPOLL: readable _, err = syscall.Write(wfd, send) MustNil(t, err) _, err = epollWaitUntil(epollfd, events, -1) MustNil(t, err) Assert(t, events[0].events&syscall.EPOLLIN != 0) Assert(t, events[0].events&syscall.EPOLLOUT != 0) _, err = syscall.Read(rfd, recv) MustTrue(t, err == nil && string(recv) == string(send)) // EPOLL: read finished _, err = epollWaitUntil(epollfd, events, -1) MustNil(t, err) Assert(t, events[0].events&syscall.EPOLLIN == 0) Assert(t, events[0].events&syscall.EPOLLOUT != 0) // EPOLL: close peer fd err = syscall.Close(wfd) MustNil(t, err) _, err = epollWaitUntil(epollfd, events, -1) MustNil(t, err) Assert(t, events[0].events&syscall.EPOLLIN != 0) Assert(t, events[0].events&syscall.EPOLLOUT != 0) Assert(t, events[0].events&syscall.EPOLLRDHUP != 0) Assert(t, events[0].events&syscall.EPOLLERR == 0) // EPOLL: close current fd rfd2, wfd2 := GetSysFdPairs() defer syscall.Close(wfd2) err = EpollCtl(epollfd, unix.EPOLL_CTL_ADD, rfd2, event) MustNil(t, err) err = syscall.Close(rfd2) MustNil(t, err) _, err = epollWaitUntil(epollfd, events, -1) MustNil(t, err) Assert(t, events[0].events&syscall.EPOLLIN != 0) Assert(t, events[0].events&syscall.EPOLLOUT != 0) Assert(t, events[0].events&syscall.EPOLLRDHUP != 0) Assert(t, events[0].events&syscall.EPOLLERR == 0) err = EpollCtl(epollfd, unix.EPOLL_CTL_DEL, rfd, event) MustNil(t, err) } func TestEpollETClose(t *testing.T) { epollfd, err := EpollCreate(0) MustNil(t, err) defer syscall.Close(epollfd) rfd, wfd := GetSysFdPairs() events := make([]epollevent, 128) eventdata := [8]byte{0, 0, 0, 0, 0, 0, 0, 1} event := &epollevent{ events: EPOLLET | syscall.EPOLLIN | syscall.EPOLLOUT | syscall.EPOLLRDHUP | syscall.EPOLLERR, data: eventdata, } // EPOLL: init state err = EpollCtl(epollfd, unix.EPOLL_CTL_ADD, rfd, event) MustNil(t, err) _, err = epollWaitUntil(epollfd, events, -1) MustNil(t, err) Assert(t, events[0].events&syscall.EPOLLIN == 0) Assert(t, events[0].events&syscall.EPOLLOUT != 0) Assert(t, events[0].events&syscall.EPOLLRDHUP == 0) Assert(t, events[0].events&syscall.EPOLLERR == 0) // EPOLL: close current fd // nothing will happen err = syscall.Close(rfd) MustNil(t, err) n, err := epollWaitUntil(epollfd, events, 100) MustNil(t, err) Assert(t, n == 0, n) err = syscall.Close(wfd) MustNil(t, err) // EPOLL: close peer fd // EPOLLIN and EPOLLOUT rfd, wfd = GetSysFdPairs() err = EpollCtl(epollfd, unix.EPOLL_CTL_ADD, rfd, event) MustNil(t, err) err = syscall.Close(wfd) MustNil(t, err) n, err = epollWaitUntil(epollfd, events, 100) MustNil(t, err) Assert(t, n == 1, n) Assert(t, events[0].events&syscall.EPOLLIN != 0) Assert(t, events[0].events&syscall.EPOLLOUT != 0) Assert(t, events[0].events&syscall.EPOLLRDHUP != 0) Assert(t, events[0].events&syscall.EPOLLERR == 0) buf := make([]byte, 1024) ivs := make([]syscall.Iovec, 1) n, err = ioread(rfd, [][]byte{buf}, ivs) // EOF Assert(t, n == 0 && errors.Is(err, ErrEOF), n, err) } func TestEpollETDel(t *testing.T) { epollfd, err := EpollCreate(0) MustNil(t, err) defer syscall.Close(epollfd) rfd, wfd := GetSysFdPairs() send := []byte("hello") events := make([]epollevent, 128) eventdata := [8]byte{0, 0, 0, 0, 0, 0, 0, 1} event := &epollevent{ events: EPOLLET | syscall.EPOLLIN | syscall.EPOLLRDHUP | syscall.EPOLLERR, data: eventdata, } // EPOLL: del partly err = EpollCtl(epollfd, unix.EPOLL_CTL_ADD, rfd, event) MustNil(t, err) event.events = syscall.EPOLLIN | syscall.EPOLLOUT | syscall.EPOLLRDHUP | syscall.EPOLLERR err = EpollCtl(epollfd, unix.EPOLL_CTL_DEL, rfd, event) MustNil(t, err) _, err = syscall.Write(wfd, send) MustNil(t, err) _, err = epollWaitUntil(epollfd, events, 100) MustNil(t, err) Assert(t, events[0].events&syscall.EPOLLIN == 0) Assert(t, events[0].events&syscall.EPOLLRDHUP == 0) Assert(t, events[0].events&syscall.EPOLLERR == 0) } func TestEpollConnectSameFD(t *testing.T) { addr := syscall.SockaddrInet4{ Port: 12345, Addr: [4]byte{127, 0, 0, 1}, } loop := newTestEventLoop("tcp", "127.0.0.1:12345", func(ctx context.Context, connection Connection) error { _, err := connection.Reader().Next(connection.Reader().Len()) return err }, ) defer loop.Shutdown(context.Background()) epollfd, err := EpollCreate(0) MustNil(t, err) defer syscall.Close(epollfd) events := make([]epollevent, 128) eventdata1 := [8]byte{0, 0, 0, 0, 0, 0, 0, 1} eventdata2 := [8]byte{0, 0, 0, 0, 0, 0, 0, 2} event1 := &epollevent{ events: EPOLLET | syscall.EPOLLOUT | syscall.EPOLLRDHUP | syscall.EPOLLERR, data: eventdata1, } event2 := &epollevent{ events: EPOLLET | syscall.EPOLLOUT | syscall.EPOLLRDHUP | syscall.EPOLLERR, data: eventdata2, } eventin := &epollevent{ events: syscall.EPOLLIN | syscall.EPOLLRDHUP | syscall.EPOLLERR, data: eventdata1, } // connect non-block socket fd1, err := syscall.Socket(syscall.AF_INET, syscall.SOCK_STREAM, syscall.IPPROTO_TCP) MustNil(t, err) t.Logf("create fd: %d", fd1) err = syscall.SetNonblock(fd1, true) MustNil(t, err) err = EpollCtl(epollfd, unix.EPOLL_CTL_ADD, fd1, event1) MustNil(t, err) err = syscall.Connect(fd1, &addr) t.Log(err) // EINPROGRESS _, err = epollWaitUntil(epollfd, events, -1) MustNil(t, err) Assert(t, events[0].events&syscall.EPOLLOUT != 0) Assert(t, events[0].events&syscall.EPOLLRDHUP == 0) Assert(t, events[0].events&syscall.EPOLLERR == 0) // forget to del fd // err = EpollCtl(epollfd, unix.EPOLL_CTL_DEL, fd1, event1) // MustNil(t, err) err = syscall.Close(fd1) // close fd1 MustNil(t, err) // connect non-block socket with same fd fd2, err := syscall.Socket(syscall.AF_INET, syscall.SOCK_STREAM, syscall.IPPROTO_TCP) MustNil(t, err) t.Logf("create fd: %d", fd2) err = syscall.SetNonblock(fd2, true) MustNil(t, err) err = EpollCtl(epollfd, unix.EPOLL_CTL_ADD, fd2, event2) MustNil(t, err) err = syscall.Connect(fd2, &addr) t.Log(err) // EINPROGRESS _, err = epollWaitUntil(epollfd, events, -1) MustNil(t, err) Assert(t, events[0].events&syscall.EPOLLOUT != 0) Assert(t, events[0].events&syscall.EPOLLRDHUP == 0) Assert(t, events[0].events&syscall.EPOLLERR == 0) err = EpollCtl(epollfd, unix.EPOLL_CTL_DEL, fd2, event2) MustNil(t, err) err = syscall.Close(fd2) // close fd2 MustNil(t, err) Equal(t, events[0].data, eventdata2) // no event after close fd fd3, err := syscall.Socket(syscall.AF_INET, syscall.SOCK_STREAM, syscall.IPPROTO_TCP) MustNil(t, err) t.Logf("create fd: %d", fd3) err = syscall.SetNonblock(fd3, true) MustNil(t, err) err = EpollCtl(epollfd, unix.EPOLL_CTL_ADD, fd3, event1) MustNil(t, err) err = syscall.Connect(fd3, &addr) t.Log(err) // EINPROGRESS _, err = epollWaitUntil(epollfd, events, -1) MustNil(t, err) Assert(t, events[0].events&syscall.EPOLLOUT != 0) Assert(t, events[0].events&syscall.EPOLLRDHUP == 0) Assert(t, events[0].events&syscall.EPOLLERR == 0) MustNil(t, err) err = EpollCtl(epollfd, unix.EPOLL_CTL_MOD, fd3, eventin) MustNil(t, err) err = syscall.Close(fd3) // close fd3 MustNil(t, err) n, err := epollWaitUntil(epollfd, events, 100) MustNil(t, err) Assert(t, n == 0) } func epollWaitUntil(epfd int, events []epollevent, msec int) (n int, err error) { WAIT: n, err = EpollWait(epfd, events, msec) if err == syscall.EINTR { goto WAIT } return n, err } ================================================ FILE: poll_loadbalance.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package netpoll import ( "sync/atomic" "github.com/bytedance/gopkg/lang/fastrand" ) // LoadBalance sets the load balancing method. type LoadBalance int const ( // RoundRobin requests that connections are distributed to a Poll // in a round-robin fashion. RoundRobin LoadBalance = iota // Random requests that connections are randomly distributed. Random ) // loadbalance sets the load balancing method for []*polls type loadbalance interface { LoadBalance() LoadBalance // Pick choose the most qualified Poll Pick() (poll Poll) Rebalance(polls []Poll) } func newLoadbalance(lb LoadBalance, polls []Poll) loadbalance { switch lb { case RoundRobin: return newRoundRobinLB(polls) case Random: return newRandomLB(polls) } return newRoundRobinLB(polls) } func newRandomLB(polls []Poll) loadbalance { return &randomLB{polls: polls, pollSize: len(polls)} } type randomLB struct { polls []Poll pollSize int } func (b *randomLB) LoadBalance() LoadBalance { return Random } func (b *randomLB) Pick() (poll Poll) { idx := fastrand.Intn(b.pollSize) return b.polls[idx] } func (b *randomLB) Rebalance(polls []Poll) { b.polls, b.pollSize = polls, len(polls) } func newRoundRobinLB(polls []Poll) loadbalance { return &roundRobinLB{polls: polls, pollSize: len(polls)} } type roundRobinLB struct { polls []Poll accepted uintptr // accept counter pollSize int } func (b *roundRobinLB) LoadBalance() LoadBalance { return RoundRobin } func (b *roundRobinLB) Pick() (poll Poll) { idx := int(atomic.AddUintptr(&b.accepted, 1)) % b.pollSize return b.polls[idx] } func (b *roundRobinLB) Rebalance(polls []Poll) { b.polls, b.pollSize = polls, len(polls) } ================================================ FILE: poll_manager.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !windows // +build !windows package netpoll import ( "fmt" "runtime" "sync/atomic" ) const ( managerUninitialized = iota managerInitializing managerInitialized ) func newManager(numLoops int) *manager { m := new(manager) m.SetLoadBalance(RoundRobin) m.SetNumLoops(numLoops) return m } // LoadBalance is used to do load balancing among multiple pollers. // a single poller may not be optimal if the number of cores is large (40C+). type manager struct { numLoops int32 status int32 // 0: uninitialized, 1: initializing, 2: initialized balance loadbalance // load balancing method polls []Poll // all the polls } // SetNumLoops will return error when set numLoops < 1 func (m *manager) SetNumLoops(numLoops int) (err error) { if numLoops < 1 { return fmt.Errorf("set invalid numLoops[%d]", numLoops) } // note: set new numLoops first and then change the status atomic.StoreInt32(&m.numLoops, int32(numLoops)) atomic.StoreInt32(&m.status, managerUninitialized) return nil } // SetLoadBalance set load balance. func (m *manager) SetLoadBalance(lb LoadBalance) error { if m.balance != nil && m.balance.LoadBalance() == lb { return nil } m.balance = newLoadbalance(lb, m.polls) return nil } // Close release all resources. func (m *manager) Close() (err error) { for _, poll := range m.polls { err = poll.Close() } m.numLoops = 0 m.balance = nil m.polls = nil return err } // Run all pollers. func (m *manager) Run() (err error) { defer func() { if err != nil { _ = m.Close() } }() numLoops := int(atomic.LoadInt32(&m.numLoops)) if numLoops == len(m.polls) { return nil } polls := make([]Poll, numLoops) if numLoops < len(m.polls) { // shrink polls copy(polls, m.polls[:numLoops]) for idx := numLoops; idx < len(m.polls); idx++ { // close redundant polls if err = m.polls[idx].Close(); err != nil { logger.Printf("NETPOLL: poller close failed: %v\n", err) } } } else { // growth polls copy(polls, m.polls) for idx := len(m.polls); idx < numLoops; idx++ { var poll Poll poll, err = openPoll() if err != nil { return err } polls[idx] = poll go poll.Wait() } } m.polls = polls // LoadBalance must be set before calling Run, otherwise it will panic. m.balance.Rebalance(m.polls) return nil } // Reset pollers, this operation is very dangerous, please make sure to do this when calling ! func (m *manager) Reset() error { for _, poll := range m.polls { poll.Close() } m.polls = nil return m.Run() } // Pick will select the poller for use each time based on the LoadBalance. func (m *manager) Pick() Poll { START: // fast path if atomic.LoadInt32(&m.status) == managerInitialized { return m.balance.Pick() } // slow path // try to get initializing lock failed, wait others finished the init work, and try again if !atomic.CompareAndSwapInt32(&m.status, managerUninitialized, managerInitializing) { runtime.Gosched() goto START } // adjust polls // m.Run() will finish very quickly, so will not many goroutines block on Pick. _ = m.Run() //nolint:staticcheck // SA9003: empty branch if !atomic.CompareAndSwapInt32(&m.status, managerInitializing, managerInitialized) { // SetNumLoops called during m.Run() which cause CAS failed // The polls will be adjusted next Pick } return m.balance.Pick() } ================================================ FILE: poll_manager_test.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !windows // +build !windows package netpoll import ( "runtime" "sync" "testing" ) func TestPollManager(t *testing.T) { r, w := GetSysFdPairs() rconn, wconn := &connection{}, &connection{} err := rconn.init(&netFD{fd: r}, nil) MustNil(t, err) err = wconn.init(&netFD{fd: w}, nil) MustNil(t, err) msg := []byte("hello world") n, err := wconn.Write(msg) MustNil(t, err) Equal(t, n, len(msg)) p, err := rconn.Reader().Next(n) MustNil(t, err) Equal(t, string(p), string(msg)) err = wconn.Close() MustNil(t, err) for rconn.IsActive() || wconn.IsActive() { runtime.Gosched() } } func TestPollManagerReset(t *testing.T) { n := pollmanager.numLoops err := pollmanager.Reset() MustNil(t, err) Equal(t, len(pollmanager.polls), int(n)) } func TestPollManagerSetNumLoops(t *testing.T) { pm := newManager(1) startGs := runtime.NumGoroutine() poll := pm.Pick() newGs := runtime.NumGoroutine() Assert(t, poll != nil) t.Logf("old=%d, new=%d", startGs, newGs) // FIXME: it's unstable due to background goroutines created by other tests // Assert(t, newGs-startGs == 1) // change pollers oldGs := newGs err := pm.SetNumLoops(100) MustNil(t, err) newGs = runtime.NumGoroutine() t.Logf("old=%d, new=%d", oldGs, newGs) // Assert(t, newGs == oldGs) // trigger polls adjustment var wg sync.WaitGroup finish := make(chan struct{}) for i := 0; i < 32; i++ { wg.Add(1) go func() { poll := pm.Pick() Assert(t, poll != nil) Assert(t, len(pm.polls) == 100) wg.Done() <-finish // hold goroutines }() } wg.Wait() close(finish) } ================================================ FILE: poll_test.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !windows // +build !windows package netpoll import ( "runtime" "sync" "sync/atomic" "syscall" "testing" "time" ) // Trigger has been validated, but no usage for now. func TestPollTrigger(t *testing.T) { t.Skip() var trigger int stop := make(chan error) p, err := openDefaultPoll() MustNil(t, err) go func() { stop <- p.Wait() }() time.Sleep(time.Millisecond) Equal(t, trigger, 0) p.Trigger() time.Sleep(time.Millisecond) Equal(t, trigger, 1) p.Trigger() time.Sleep(time.Millisecond) Equal(t, trigger, 2) p.Close() err = <-stop MustNil(t, err) } func TestPollMod(t *testing.T) { var rn, wn, hn int32 read := func(p Poll) error { atomic.AddInt32(&rn, 1) return nil } write := func(p Poll) error { atomic.AddInt32(&wn, 1) return nil } hup := func(p Poll) error { atomic.AddInt32(&hn, 1) return nil } stop := make(chan error) p, err := openDefaultPoll() MustNil(t, err) go func() { stop <- p.Wait() }() rfd, wfd := GetSysFdPairs() rop := &FDOperator{FD: rfd, OnRead: read, OnWrite: write, OnHup: hup, poll: p} wop := &FDOperator{FD: wfd, OnRead: read, OnWrite: write, OnHup: hup, poll: p} var r, w, h int32 r, w, h = atomic.LoadInt32(&rn), atomic.LoadInt32(&wn), atomic.LoadInt32(&hn) Assert(t, r == 0 && w == 0 && h == 0, r, w, h) err = p.Control(rop, PollReadable) MustNil(t, err) r, w, h = atomic.LoadInt32(&rn), atomic.LoadInt32(&wn), atomic.LoadInt32(&hn) Assert(t, r == 0 && w == 0 && h == 0, r, w, h) err = p.Control(wop, PollWritable) // trigger one shot MustNil(t, err) for atomic.LoadInt32(&wn) == 0 { runtime.Gosched() } r, w, h = atomic.LoadInt32(&rn), atomic.LoadInt32(&wn), atomic.LoadInt32(&hn) Assert(t, r == 0 && w >= 1 && h == 0, r, w, h) err = p.Control(rop, PollR2RW) // trigger write MustNil(t, err) for atomic.LoadInt32(&wn) <= 1 { runtime.Gosched() } r, w, h = atomic.LoadInt32(&rn), atomic.LoadInt32(&wn), atomic.LoadInt32(&hn) Assert(t, r == 0 && w >= 2 && h == 0, r, w, h) // close wfd, then trigger hup rfd err = syscall.Close(wfd) // trigger hup MustNil(t, err) for atomic.LoadInt32(&hn) == 0 { runtime.Gosched() } w, h = atomic.LoadInt32(&wn), atomic.LoadInt32(&hn) Assert(t, w >= 2 && h >= 1, r, w, h) p.Close() err = <-stop MustNil(t, err) } func TestPollClose(t *testing.T) { p, err := openDefaultPoll() MustNil(t, err) var wg sync.WaitGroup wg.Add(1) go func() { p.Wait() wg.Done() }() p.Close() wg.Wait() } func BenchmarkPollMod(b *testing.B) { b.StopTimer() p, _ := openDefaultPoll() r, _ := GetSysFdPairs() operator := &FDOperator{FD: r} p.Control(operator, PollReadable) // benchmark b.ReportAllocs() b.StartTimer() for i := 0; i < b.N; i++ { p.Control(operator, PollR2RW) } } ================================================ FILE: sys_epoll_linux.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !arm64 && !loong64 // +build !arm64,!loong64 package netpoll import ( "syscall" "unsafe" ) const EPOLLET = -syscall.EPOLLET type epollevent struct { events uint32 data [8]byte // unaligned uintptr } // EpollCreate implements epoll_create1. func EpollCreate(flag int) (fd int, err error) { var r0 uintptr r0, _, err = syscall.RawSyscall(syscall.SYS_EPOLL_CREATE1, uintptr(flag), 0, 0) if err == syscall.Errno(0) { err = nil } return int(r0), err } // EpollCtl implements epoll_ctl. func EpollCtl(epfd, op, fd int, event *epollevent) (err error) { _, _, err = syscall.RawSyscall6(syscall.SYS_EPOLL_CTL, uintptr(epfd), uintptr(op), uintptr(fd), uintptr(unsafe.Pointer(event)), 0, 0) if err == syscall.Errno(0) { err = nil } return err } // EpollWait implements epoll_wait. func EpollWait(epfd int, events []epollevent, msec int) (n int, err error) { var r0 uintptr _p0 := unsafe.Pointer(&events[0]) if msec == 0 { r0, _, err = syscall.RawSyscall6(syscall.SYS_EPOLL_WAIT, uintptr(epfd), uintptr(_p0), uintptr(len(events)), 0, 0, 0) } else { r0, _, err = syscall.Syscall6(syscall.SYS_EPOLL_WAIT, uintptr(epfd), uintptr(_p0), uintptr(len(events)), uintptr(msec), 0, 0) } if err == syscall.Errno(0) { err = nil } return int(r0), err } ================================================ FILE: sys_epoll_linux_arm64.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package netpoll import ( "syscall" "unsafe" ) const EPOLLET = syscall.EPOLLET type epollevent struct { events uint32 _ int32 data [8]byte // unaligned uintptr } // EpollCreate implements epoll_create1. func EpollCreate(flag int) (fd int, err error) { var r0 uintptr r0, _, err = syscall.RawSyscall(syscall.SYS_EPOLL_CREATE1, uintptr(flag), 0, 0) if err == syscall.Errno(0) { err = nil } return int(r0), err } // EpollCtl implements epoll_ctl. func EpollCtl(epfd int, op int, fd int, event *epollevent) (err error) { _, _, err = syscall.RawSyscall6(syscall.SYS_EPOLL_CTL, uintptr(epfd), uintptr(op), uintptr(fd), uintptr(unsafe.Pointer(event)), 0, 0) if err == syscall.Errno(0) { err = nil } return err } // EpollWait implements epoll_wait. func EpollWait(epfd int, events []epollevent, msec int) (n int, err error) { var r0 uintptr _p0 := unsafe.Pointer(&events[0]) if msec == 0 { r0, _, err = syscall.RawSyscall6(syscall.SYS_EPOLL_PWAIT, uintptr(epfd), uintptr(_p0), uintptr(len(events)), 0, 0, 0) } else { r0, _, err = syscall.Syscall6(syscall.SYS_EPOLL_PWAIT, uintptr(epfd), uintptr(_p0), uintptr(len(events)), uintptr(msec), 0, 0) } if err == syscall.Errno(0) { err = nil } return int(r0), err } ================================================ FILE: sys_epoll_linux_loong64.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build linux && loong64 // +build linux,loong64 package netpoll import ( "syscall" "unsafe" ) const EPOLLET = syscall.EPOLLET type epollevent struct { events uint32 _ int32 data [8]byte // unaligned uintptr } // EpollCreate implements epoll_create1. func EpollCreate(flag int) (fd int, err error) { var r0 uintptr r0, _, err = syscall.RawSyscall(syscall.SYS_EPOLL_CREATE1, uintptr(flag), 0, 0) if err == syscall.Errno(0) { err = nil } return int(r0), err } // EpollCtl implements epoll_ctl. func EpollCtl(epfd int, op int, fd int, event *epollevent) (err error) { _, _, err = syscall.RawSyscall6(syscall.SYS_EPOLL_CTL, uintptr(epfd), uintptr(op), uintptr(fd), uintptr(unsafe.Pointer(event)), 0, 0) if err == syscall.Errno(0) { err = nil } return err } // EpollWait implements epoll_wait. func EpollWait(epfd int, events []epollevent, msec int) (n int, err error) { var r0 uintptr _p0 := unsafe.Pointer(&events[0]) if msec == 0 { r0, _, err = syscall.RawSyscall6(syscall.SYS_EPOLL_PWAIT, uintptr(epfd), uintptr(_p0), uintptr(len(events)), 0, 0, 0) } else { r0, _, err = syscall.Syscall6(syscall.SYS_EPOLL_PWAIT, uintptr(epfd), uintptr(_p0), uintptr(len(events)), uintptr(msec), 0, 0) } if err == syscall.Errno(0) { err = nil } return int(r0), err } ================================================ FILE: sys_exec.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !windows // +build !windows package netpoll import ( "math" "os" "syscall" "unsafe" ) // GetSysFdPairs creates and returns the fds of a pair of sockets. func GetSysFdPairs() (r, w int) { fds, _ := syscall.Socketpair(syscall.AF_UNIX, syscall.SOCK_STREAM, 0) return fds[0], fds[1] } // setTCPNoDelay set the TCP_NODELAY flag on socket func setTCPNoDelay(fd int, b bool) (err error) { return syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_NODELAY, boolint(b)) } // Wrapper around the socket system call that marks the returned file // descriptor as nonblocking and close-on-exec. func sysSocket(family, sotype, proto int) (int, error) { // See ../syscall/exec_unix.go for description of ForkLock. syscall.ForkLock.RLock() s, err := syscall.Socket(family, sotype, proto) if err == nil { syscall.CloseOnExec(s) } syscall.ForkLock.RUnlock() if err != nil { return -1, os.NewSyscallError("socket", err) } if err = syscall.SetNonblock(s, true); err != nil { syscall.Close(s) return -1, os.NewSyscallError("setnonblock", err) } return s, nil } const barriercap = 32 type barrier struct { bs [][]byte ivs []syscall.Iovec } // writev wraps the writev system call. func writev(fd int, bs [][]byte, ivs []syscall.Iovec) (n int, err error) { iovLen := iovecs(bs, ivs) if iovLen == 0 { return 0, nil } // syscall r, _, e := syscall.RawSyscall(syscall.SYS_WRITEV, uintptr(fd), uintptr(unsafe.Pointer(&ivs[0])), uintptr(iovLen)) resetIovecs(bs, ivs[:iovLen]) if e != 0 { return int(r), e } return int(r), nil } // readv wraps the readv system call. // return 0, nil means EOF. func readv(fd int, bs [][]byte, ivs []syscall.Iovec) (n int, err error) { iovLen := iovecs(bs, ivs) if iovLen == 0 { return 0, nil } // syscall r, _, e := syscall.RawSyscall(syscall.SYS_READV, uintptr(fd), uintptr(unsafe.Pointer(&ivs[0])), uintptr(iovLen)) resetIovecs(bs, ivs[:iovLen]) if e != 0 { return int(r), e } return int(r), nil } // TODO: read from sysconf(_SC_IOV_MAX)? The Linux default is // // 1024 and this seems conservative enough for now. Darwin's // UIO_MAXIOV also seems to be 1024. // // iovecs limit length to 2GB(2^31) func iovecs(bs [][]byte, ivs []syscall.Iovec) (iovLen int) { totalLen := 0 for i := 0; i < len(bs); i++ { chunk := bs[i] l := len(chunk) if l == 0 { continue } ivs[iovLen].Base = &chunk[0] totalLen += l if totalLen < math.MaxInt32 { ivs[iovLen].SetLen(l) iovLen++ } else { newLen := math.MaxInt32 - totalLen + l ivs[iovLen].SetLen(newLen) iovLen++ return iovLen } } return iovLen } func resetIovecs(bs [][]byte, ivs []syscall.Iovec) { for i := 0; i < len(bs); i++ { bs[i] = nil } for i := 0; i < len(ivs); i++ { ivs[i].Base = nil } } // Boolean to int. func boolint(b bool) int { if b { return 1 } return 0 } ================================================ FILE: sys_exec_test.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build !windows // +build !windows package netpoll import ( "math" "syscall" "testing" ) func TestIovecs(t *testing.T) { var got int var bs [][]byte ivs := make([]syscall.Iovec, 4) // case 1 bs = [][]byte{ make([]byte, 10), make([]byte, 20), make([]byte, 30), make([]byte, 40), } got = iovecs(bs, ivs) Equal(t, got, 4) Equal(t, int(ivs[0].Len), 10) Equal(t, int(ivs[1].Len), 20) Equal(t, int(ivs[2].Len), 30) Equal(t, int(ivs[3].Len), 40) // case 2 resetIovecs(bs, ivs) bs = [][]byte{ make([]byte, math.MaxInt32+100), make([]byte, 20), make([]byte, 30), make([]byte, 40), } got = iovecs(bs, ivs) Equal(t, got, 1) Equal(t, int(ivs[0].Len), math.MaxInt32) Assert(t, ivs[1].Base == nil) Assert(t, ivs[2].Base == nil) Assert(t, ivs[3].Base == nil) // case 3 resetIovecs(bs, ivs) bs = [][]byte{ make([]byte, 10), make([]byte, 20), make([]byte, math.MaxInt32+100), make([]byte, 40), } got = iovecs(bs, ivs) Equal(t, got, 3) Equal(t, int(ivs[0].Len), 10) Equal(t, int(ivs[1].Len), 20) Equal(t, int(ivs[2].Len), math.MaxInt32-30) Assert(t, ivs[3].Base == nil) } func TestWritev(t *testing.T) { r, w := GetSysFdPairs() barrier := barrier{} barrier.bs = [][]byte{ []byte(""), // len=0 []byte("first line"), // len=10 []byte("second line"), // len=11 []byte("third line"), // len=10 } barrier.ivs = make([]syscall.Iovec, len(barrier.bs)) wn, err := writev(w, barrier.bs, barrier.ivs) MustNil(t, err) Equal(t, wn, 31) p := make([]byte, 50) rn, err := syscall.Read(r, p) MustNil(t, err) Equal(t, rn, 31) t.Logf("READ %s", p[:rn]) } func TestReadv(t *testing.T) { r, w := GetSysFdPairs() vs := [][]byte{ []byte("first line"), // len=10 []byte("second line"), // len=11 []byte("third line"), // len=10 } w1, _ := syscall.Write(w, vs[0]) w2, _ := syscall.Write(w, vs[1]) w3, _ := syscall.Write(w, vs[2]) Equal(t, w1+w2+w3, 31) barrier := barrier{ bs: make([][]byte, 4), } res := [][]byte{ make([]byte, 0), make([]byte, 10), make([]byte, 11), make([]byte, 10), } copy(barrier.bs, res) barrier.ivs = make([]syscall.Iovec, len(barrier.bs)) rn, err := readv(r, barrier.bs, barrier.ivs) MustNil(t, err) Equal(t, rn, 31) for i, v := range res { t.Logf("READ [%d] %s", i, v) } } func TestSendmsg(t *testing.T) { r, w := GetSysFdPairs() barrier := barrier{} barrier.bs = [][]byte{ []byte(""), // len=0 []byte("first line"), // len=10 []byte("second line"), // len=11 []byte("third line"), // len=10 } barrier.ivs = make([]syscall.Iovec, len(barrier.bs)) wn, err := sendmsg(w, barrier.bs, barrier.ivs, false) MustNil(t, err) Equal(t, wn, 31) p := make([]byte, 50) rn, err := syscall.Read(r, p) MustNil(t, err) Equal(t, rn, 31) t.Logf("READ %s", p[:rn]) } func BenchmarkWrite(b *testing.B) { b.StopTimer() r, w := GetSysFdPairs() message := "hello, world!" size := 5 go func() { buffer := make([]byte, 13) for { syscall.Read(r, buffer) } }() // benchmark b.ReportAllocs() b.StartTimer() for i := 0; i < b.N; i++ { wmsg := make([]byte, len(message)*5) var n int for j := 0; j < size; j++ { n += copy(wmsg[n:], message) } syscall.Write(w, wmsg) } } func BenchmarkWritev(b *testing.B) { b.StopTimer() r, w := GetSysFdPairs() message := "hello, world!" size := 5 barrier := barrier{} barrier.bs = make([][]byte, size) barrier.ivs = make([]syscall.Iovec, len(barrier.bs)) for i := range barrier.bs { barrier.bs[i] = make([]byte, len(message)) } go func() { buffer := make([]byte, 13) for { syscall.Read(r, buffer) } }() // benchmark b.ReportAllocs() b.StartTimer() for i := 0; i < b.N; i++ { writev(w, barrier.bs, barrier.ivs) } } func BenchmarkSendmsg(b *testing.B) { b.StopTimer() r, w := GetSysFdPairs() message := "hello, world!" size := 5 barrier := barrier{} barrier.bs = make([][]byte, size) barrier.ivs = make([]syscall.Iovec, len(barrier.bs)) for i := range barrier.bs { barrier.bs[i] = make([]byte, len(message)) } go func() { buffer := make([]byte, 13) for { syscall.Read(r, buffer) } }() // benchmark b.ReportAllocs() b.StartTimer() for i := 0; i < b.N; i++ { sendmsg(w, barrier.bs, barrier.ivs, false) } } func BenchmarkRead(b *testing.B) { b.StopTimer() r, w := GetSysFdPairs() message := "hello, world!" size := 5 wmsg := make([]byte, size*len(message)) var n int for j := 0; j < size; j++ { n += copy(wmsg[n:], message) } go func() { for { syscall.Write(w, wmsg) } }() // benchmark b.ReportAllocs() b.StartTimer() for i := 0; i < b.N; i++ { buffer := make([]byte, size*len(message)) syscall.Read(r, buffer) } } func BenchmarkReadv(b *testing.B) { b.StopTimer() r, w := GetSysFdPairs() message := "hello, world!" size := 5 barrier := barrier{} barrier.bs = make([][]byte, size) barrier.ivs = make([]syscall.Iovec, len(barrier.bs)) for i := range barrier.bs { barrier.bs[i] = make([]byte, len(message)) } go func() { for { writeAll(w, []byte(message)) } }() // benchmark b.ReportAllocs() b.StartTimer() for i := 0; i < b.N; i++ { readv(r, barrier.bs, barrier.ivs) } } ================================================ FILE: sys_keepalive_darwin.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package netpoll import "syscall" // SetKeepAlive sets the keepalive for the connection func SetKeepAlive(fd, secs int) error { if err := syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_KEEPALIVE, 1); err != nil { return err } switch err := syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, 0x101, secs); err { case nil, syscall.ENOPROTOOPT: // OS X 10.7 and earlier don't support this option default: return err } return syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_KEEPALIVE, secs) } ================================================ FILE: sys_keepalive_openbsd.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package netpoll // SetKeepAlive sets the keepalive for the connection func SetKeepAlive(fd, secs int) error { // OpenBSD has no user-settable per-socket TCP keepalive options. return nil } ================================================ FILE: sys_keepalive_unix.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build netbsd || freebsd || dragonfly || linux // +build netbsd freebsd dragonfly linux package netpoll import "syscall" // just support ipv4 func SetKeepAlive(fd, secs int) error { // open keep-alive if err := syscall.SetsockoptInt(fd, syscall.SOL_SOCKET, syscall.SO_KEEPALIVE, 1); err != nil { return err } // tcp_keepalive_intvl if err := syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_KEEPINTVL, secs); err != nil { return err } // tcp_keepalive_probes // if err := syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_KEEPCNT, 1); err != nil { // return err // } // tcp_keepalive_time return syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_KEEPIDLE, secs) } ================================================ FILE: sys_sendmsg_bsd.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //go:build darwin || dragonfly || freebsd || netbsd || openbsd // +build darwin dragonfly freebsd netbsd openbsd package netpoll import ( "syscall" "unsafe" ) // sendmsg wraps the sendmsg system call. // Must len(iovs) >= len(vs) func sendmsg(fd int, bs [][]byte, ivs []syscall.Iovec, zerocopy bool) (n int, err error) { iovLen := iovecs(bs, ivs) if iovLen == 0 { return 0, nil } msghdr := syscall.Msghdr{ Iov: &ivs[0], Iovlen: int32(iovLen), } // flags = syscall.MSG_DONTWAIT r, _, e := syscall.RawSyscall(syscall.SYS_SENDMSG, uintptr(fd), uintptr(unsafe.Pointer(&msghdr)), uintptr(0)) resetIovecs(bs, ivs[:iovLen]) if e != 0 { return int(r), e } return int(r), nil } ================================================ FILE: sys_sendmsg_linux.go ================================================ // Copyright 2022 CloudWeGo Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package netpoll import ( "syscall" "unsafe" ) //func init() { // err := syscall.Setrlimit(8, &syscall.Rlimit{ // Cur: 0xffffffff, // Max: 0xffffffff, // }) // if err != nil { // panic(err) // } //} // sendmsg wraps the sendmsg system call. // Must len(iovs) >= len(vs) func sendmsg(fd int, bs [][]byte, ivs []syscall.Iovec, zerocopy bool) (n int, err error) { iovLen := iovecs(bs, ivs) if iovLen == 0 { return 0, nil } msghdr := syscall.Msghdr{ Iov: &ivs[0], Iovlen: uint64(iovLen), } r, _, e := syscall.RawSyscall(syscall.SYS_SENDMSG, uintptr(fd), uintptr(unsafe.Pointer(&msghdr)), 0) resetIovecs(bs, ivs[:iovLen]) if e != 0 { return int(r), e } return int(r), nil } ================================================ FILE: sys_sockopt_bsd.go ================================================ // Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // // This file may have been modified by CloudWeGo authors. (“CloudWeGo Modifications”). // All CloudWeGo Modifications are Copyright 2022 CloudWeGo authors. //go:build darwin || dragonfly || freebsd || netbsd || openbsd // +build darwin dragonfly freebsd netbsd openbsd package netpoll import ( "os" "runtime" "syscall" ) func setDefaultSockopts(s, family, sotype int, ipv6only bool) error { if runtime.GOOS == "dragonfly" && sotype != syscall.SOCK_RAW { // On DragonFly BSD, we adjust the ephemeral port // range because unlike other BSD systems its default // port range doesn't conform to IANA recommendation // as described in RFC 6056 and is pretty narrow. switch family { case syscall.AF_INET: syscall.SetsockoptInt(s, syscall.IPPROTO_IP, syscall.IP_PORTRANGE, syscall.IP_PORTRANGE_HIGH) case syscall.AF_INET6: syscall.SetsockoptInt(s, syscall.IPPROTO_IPV6, syscall.IPV6_PORTRANGE, syscall.IPV6_PORTRANGE_HIGH) } } // Allow broadcast. return os.NewSyscallError("setsockopt", syscall.SetsockoptInt(s, syscall.SOL_SOCKET, syscall.SO_BROADCAST, 1)) } ================================================ FILE: sys_sockopt_linux.go ================================================ // Copyright 2009 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // // This file may have been modified by CloudWeGo authors. (“CloudWeGo Modifications”). // All CloudWeGo Modifications are Copyright 2022 CloudWeGo authors. package netpoll import ( "os" "syscall" ) func setDefaultSockopts(s, family, sotype int, ipv6only bool) error { if family == syscall.AF_INET6 && sotype != syscall.SOCK_RAW { // Allow both IP versions even if the OS default // is otherwise. Note that some operating systems // never admit this option. syscall.SetsockoptInt(s, syscall.IPPROTO_IPV6, syscall.IPV6_V6ONLY, boolint(ipv6only)) } // Allow broadcast. return os.NewSyscallError("setsockopt", syscall.SetsockoptInt(s, syscall.SOL_SOCKET, syscall.SO_BROADCAST, 1)) } ================================================ FILE: test_conns.sh ================================================ #!/usr/bin/env bash ip="$1" port="$2" conns="$3" timeout="$4" for i in $(seq 1 $conns); do nc -v -w $timeout $ip $port < /dev/null & done wait