[
  {
    "path": ".github/CODEOWNERS",
    "content": "# Each line is a file pattern followed by one or more owners.\n# More on CODEOWNERS files: https://help.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners\n\n# Default owner\n* @hashicorp/team-ip-compliance @hashicorp/consul-core-reviewers @hashicorp/nomad-eng @hashicorp/raft-force\n\n# Add override rules below. Each line is a file/folder pattern followed by one or more owners.\n# Being an owner means those groups or individuals will be added as reviewers to PRs affecting\n# those areas of the code.\n# Examples:\n# /docs/  @docs-team\n# *.js    @js-team\n# *.go    @go-team\n"
  },
  {
    "path": ".github/dependabot.yml",
    "content": "# Copyright (c) HashiCorp, Inc.\n# SPDX-License-Identifier: MPL-2.0\n\nversion: 2\n\nupdates:\n  - package-ecosystem: \"github-actions\"\n    directory: \"/\"\n    schedule:\n      interval: \"weekly\"\n      day: \"sunday\"\n    commit-message:\n      prefix: \"[chore] : \"\n    groups:\n      actions:\n        patterns:\n          - \"*\"\n\n  - package-ecosystem: \"gomod\"\n    directories:\n      - \"/\"\n      - \"/fuzzy\"\n      - \"/raft-compat\"\n    schedule:\n      interval: \"weekly\"\n      day: \"sunday\"\n    commit-message:\n      prefix: \"[chore] : \"\n    groups:\n      go:\n        patterns:\n          - \"*\"\n        applies-to: \"version-updates\"\n      go-security:\n        patterns:\n          - \"*\"\n        applies-to: \"security-updates\"\n"
  },
  {
    "path": ".github/pull_request_template.md",
    "content": "<!-- heimdall_github_prtemplate:grc-pci_dss-2024-01-05 -->\n## Description\n\n<!-- Provide a summary of what the PR does and why it is being submitted. -->\n\n## Related Issue\n\n<!-- If this PR is linked to any issue, provide the issue number or description here. Any related JIRA tickets can also be added here. -->\n\n## How Has This Been Tested?\n\n<!-- Describe how the changes have been tested. Provide test instructions or details. -->\n"
  },
  {
    "path": ".github/stale.yml",
    "content": "# Copyright IBM Corp. 2013, 2025\n# SPDX-License-Identifier: MPL-2.0\n\n# Number of days of inactivity before an Issue becomes stale\ndaysUntilStale: 60\n\n# Number of days of inactivity before an Issue with the stale label is closed.\n# Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale.\ndaysUntilClose: 30\n\n# Issues with these labels will never be considered stale. Set to `[]` to disable\n# We don't close any issue that is an enhancement or confirmed bug, but issues\n# waiting for reproduction cases and questions tend to get outdated.\nexemptLabels:\n  - \"enhancement\"\n  - \"bug\"\n  - \"thinking\"\n  - \"docs\"\n\n# Label to use when marking as stale\nstaleLabel: \"waiting-reply\"\n\n# Comment to post when marking as stale. Set to `false` to disable\nmarkComment: |\n  Hey there,\n  We wanted to check in on this request since it has been inactive for at least 90 days.\n  Have you reviewed the latest [godocs](https://godoc.org/github.com/hashicorp/raft)? \n  If you think this is still an important issue in the latest version of [the Raft library](https://github.com/hashicorp/raft/compare/) or \n  [its documentation](https://github.com/hashicorp/raft/compare/) please feel let us know and we'll keep it open for investigation.\n  If there is still no activity on this request in 30 days, we will go ahead and close it.\n  Thank you!\n\n# Comment to post when removing the stale label. Set to `false` to disable\nunmarkComment: false\n\n# Comment to post when closing a stale Issue. Set to `false` to disable\ncloseComment: >\n  Hey there,\n  This issue has been automatically closed because there hasn't been any activity for a while. \n  If you are still experiencing problems, or still have questions, feel free to [open a new one](https://github.com/hashicorp/raft/issues/new) :+1\n"
  },
  {
    "path": ".github/workflows/ci.yml",
    "content": "name: ci\n\non:\n  pull_request:\n    branches: [\"main\"]\n  push:\n    branches: [\"main\"]\n    tags: [\"*\"]\n\npermissions:\n  contents: read\n\njobs:\n  go-fmt-and-vet:\n    runs-on: ubuntu-22.04\n    steps:\n      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2\n      - uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0\n        with:\n          go-version: 'stable'\n          cache: true\n      - name: go fmt\n        run: |\n          files=$(go fmt ./...)\n          if [ -n \"$files\" ]; then\n            echo \"The following file(s) do not conform to go fmt:\"\n            echo \"$files\"\n            exit 1\n          fi\n      - name: go vet\n        run: |\n          PACKAGE_NAMES=$(go list ./... | grep -v github.com/hashicorp/raft/fuzzy)\n          go vet $PACKAGE_NAMES\n      - name: golangci-lint\n        uses: golangci/golangci-lint-action@1e7e51e771db61008b38414a730f564565cf7c20 # v9.2.0\n\n  go-test:\n    needs: go-fmt-and-vet\n    strategy:\n      matrix:\n        go: ['stable', 'oldstable']\n        arch: ['x32', 'x64']\n    runs-on: ubuntu-22.04\n    env:\n      INTEG_TESTS: yes\n    steps:\n      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2\n      - uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0\n        with:\n          go-version: ${{ matrix.go }}\n          architecture: ${{ matrix.arch }}\n          cache: true\n      # x86 specific build.\n      - if: matrix.arch == 'x32'\n        run: |\n          sudo apt-get update\n          sudo apt-get install gcc-multilib\n          go test --tags batchtest ./...\n      # x86-64 specific build.\n      - if: matrix.arch == 'x64'\n        run: go test -race --tags batchtest ./...\n  go-test-compat:\n    needs: go-test\n    strategy:\n      matrix:\n        go: ['stable', 'oldstable']\n        arch: ['x32', 'x64']\n    runs-on: ubuntu-22.04\n    steps:\n      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2\n      - uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0\n        with:\n          go-version: ${{ matrix.go }}\n          architecture: ${{ matrix.arch }}\n          cache: true\n          submodules: true\n      # x86 specific build.\n      - if: matrix.arch == 'x32'\n        run: |\n          sudo apt-get update\n          sudo apt-get install gcc-multilib\n          git submodule update --init --recursive\n          cd raft-compat\n          go mod tidy\n          go test -v -coverpkg=./... ./... -coverprofile=\"${{ github.workspace }}/coverage.out\"\n      # x86-64 specific build.\n      - if: matrix.arch == 'x64'\n        run: |\n          git submodule update --init --recursive\n          cd raft-compat\n          go mod tidy\n          go test -race -v -coverpkg=./... ./... -coverprofile=\"${{ github.workspace }}/coverage.out\"\n      - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0\n        with:\n          path: \"${{ github.workspace }}/coverage.out\"\n          name: coverage-report-${{matrix.go}}-${{matrix.arch}}\n"
  },
  {
    "path": ".github/workflows/two-step-pr-approval.yml",
    "content": "name: Two-Stage PR Review Process\n\non:\n  pull_request:\n    types: [opened, synchronize, reopened, labeled, unlabeled, ready_for_review, converted_to_draft]\n  pull_request_review:\n    types: [submitted]\n\njobs:\n  manage-pr-status:\n    runs-on: ubuntu-latest\n    permissions:\n      pull-requests: write\n      contents: write\n    steps:\n      - name: Checkout code\n        uses: actions/checkout@v6.0.2\n      \n      - name: Two stage PR review\n        uses: hashicorp/two-stage-pr-approval@v0.1.0\n"
  },
  {
    "path": ".gitignore",
    "content": "# Compiled Object files, Static and Dynamic libs (Shared Objects)\n*.o\n*.a\n*.so\n\n# Folders\n_obj\n_test\n\n# Architecture specific extensions/prefixes\n*.[568vq]\n[568vq].out\n\n*.cgo1.go\n*.cgo2.c\n_cgo_defun.c\n_cgo_gotypes.go\n_cgo_export.*\n\n_testmain.go\n\n*.exe\n*.test\n\n# Goland IDE\n.idea\n"
  },
  {
    "path": ".gitmodules",
    "content": "[submodule \"raft-compat/raft-latest\"]\n\tpath = raft-compat/raft-previous-version\n\turl = https://github.com/hashicorp/raft.git\n"
  },
  {
    "path": ".golangci-lint.yml",
    "content": "# Copyright IBM Corp. 2013, 2025\n# SPDX-License-Identifier: MPL-2.0\n\nrun:\n  deadline: 5m\n\nlinters-settings:\n  govet:\n    check-shadowing: true\n  golint:\n    min-confidence: 0\n  depguard:\n    rules:\n      main:\n        list-mode: lax\n        allow:\n          - \"github.com/hashicorp/go-metrics/compat\"\n        deny:\n          - pkg: \"github.com/hashicorp/go-metrics\"\n            desc: not allowed, use github.com/hashicorp/go-metrics/compat instead\n          - pkg: \"github.com/armon/go-metrics\"\n            desc: not allowed, use github.com/hashicorp/go-metrics/compat instead\n\nlinters:\n  disable-all: true\n  enable:\n    - gofmt\n    #- golint\n    - govet\n    - depguard\n    #- varcheck\n    #- typecheck\n    #- gosimple\n\nissues:\n  exclude-use-default: false\n  exclude:\n      # ignore the false positive erros resulting from not including a comment above every `package` keyword\n    - should have a package comment, unless it's in another file for this package (golint)\n      # golint: Annoying issue about not having a comment. The rare codebase has such comments\n      # - (comment on exported (method|function|type|const)|should have( a package)? comment|comment should be of the form)\n      # errcheck: Almost all programs ignore errors on these functions and in most cases it's ok\n    - Error return value of .((os\\.)?std(out|err)\\..*|.*Close|.*Flush|os\\.Remove(All)?|.*printf?|os\\.(Un)?Setenv). is not checked\n\n      # golint: False positive when tests are defined in package 'test'\n    - func name will be used as test\\.Test.* by other packages, and that stutters; consider calling this\n\n      # staticcheck: Developers tend to write in C-style with an \n      # explicit 'break' in a 'switch', so it's ok to ignore\n    - ineffective break statement. Did you mean to break out of the outer loop\n      # gosec: Too many false-positives on 'unsafe' usage\n    - Use of unsafe calls should be audited\n\n    # gosec: Too many false-positives for parametrized shell calls\n    - Subprocess launch(ed with variable|ing should be audited)\n\n    # gosec: Duplicated errcheck checks\n    - G104\n\n    # gosec: Too many issues in popular repos\n    - (Expect directory permissions to be 0750 or less|Expect file permissions to be 0600 or less)\n\n    # gosec: False positive is triggered by 'src, err := ioutil.ReadFile(filename)'\n    - Potential file inclusion via variable\n"
  },
  {
    "path": ".travis.yml",
    "content": "# Copyright IBM Corp. 2013, 2025\n# SPDX-License-Identifier: MPL-2.0\n\nlanguage: go\n\ngo:\n    # Disabled until https://github.com/armon/go-metrics/issues/59 is fixed\n    # - 1.6\n    - 1.8\n    - 1.9\n    - 1.12\n    - tip\n\ninstall: \n    - make deps \n    - curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | sh -s -- -b $(go env GOPATH)/bin latest \n\nscript:\n    - make integ\n\nnotifications:\n    flowdock:\n        secure: fZrcf9rlh2IrQrlch1sHkn3YI7SKvjGnAl/zyV5D6NROe1Bbr6d3QRMuCXWWdhJHzjKmXk5rIzbqJhUc0PNF7YjxGNKSzqWMQ56KcvN1k8DzlqxpqkcA3Jbs6fXCWo2fssRtZ7hj/wOP1f5n6cc7kzHDt9dgaYJ6nO2fqNPJiTc=\n\n"
  },
  {
    "path": "CHANGELOG.md",
    "content": "# UNRELEASED\n\nIMPROVEMENETS\n\n* Added a flag to skip legacy duplicate telemetry. [GH-630](https://github.com/hashicorp/raft/pull/630)\n\n# 1.7.0 (June 5th, 2024)\n\nCHANGES\n\n* Raft multi version testing [GH-559](https://github.com/hashicorp/raft/pull/559)\n\nIMPROVEMENTS\n\n* Raft pre-vote extension implementation, activated by default. [GH-530](https://github.com/hashicorp/raft/pull/530)\n\nBUG FIXES\n\n* Fix serialize NetworkTransport data race on ServerAddr(). [GH-591](https://github.com/hashicorp/raft/pull/591)\n\n# 1.6.1 (January 8th, 2024)\n\nCHANGES\n\n* Add reference use of Hashicorp Raft. [GH-584](https://github.com/hashicorp/raft/pull/584)\n* [COMPLIANCE] Add Copyright and License Headers. [GH-580](https://github.com/hashicorp/raft/pull/580)\n\nIMPROVEMENTS\n\n* Bump github.com/hashicorp/go-hclog from 1.5.0 to 1.6.2. [GH-583](https://github.com/hashicorp/raft/pull/583)\n\nBUG FIXES\n\n* Fix rare leadership transfer failures when writes happen during transfer. [GH-581](https://github.com/hashicorp/raft/pull/581)\n\n# 1.6.0 (November 15th, 2023)\n\nCHANGES\n\n* Upgrade hashicorp/go-msgpack to v2, with go.mod upgraded from v0.5.5 to v2.1.1. [GH-577](https://github.com/hashicorp/raft/pull/577)\n\n  go-msgpack v2.1.1 is by default binary compatible with v0.5.5 (\"non-builtin\" encoding of `time.Time`), but can decode messages produced by v1.1.5 as well (\"builtin\" encoding of `time.Time`).\n\n  However, if users of this library overrode the version of go-msgpack (especially to v1), this **could break** compatibility if raft nodes are running a mix of versions.\n\n  This compatibility can be configured at runtime in Raft using `NetworkTransportConfig.MsgpackUseNewTimeFormat` -- the default is `false`, which maintains compatibility with `go-msgpack` v0.5.5, but if set to `true`, will be compatible with `go-msgpack` v1.1.5.\n\nIMPROVEMENTS\n\n* Push to notify channel when shutting down. [GH-567](https://github.com/hashicorp/raft/pull/567)\n* Add CommitIndex API [GH-560](https://github.com/hashicorp/raft/pull/560)\n* Document some Apply error cases better [GH-561](https://github.com/hashicorp/raft/pull/561)\n\nBUG FIXES\n\n* Race with `candidateFromLeadershipTransfer` [GH-570](https://github.com/hashicorp/raft/pull/570)\n\n\n# 1.5.0 (April 21st, 2023)\n\nIMPROVEMENTS\n* Fixed a performance anomaly related to pipelining RPCs that caused large increases in commit latency under high write throughput. Default behavior has changed. For more information see #541.\n\n# 1.4.0 (March 17th, 2023)\n\nFEATURES\n* Support log stores with a monotonically increasing index.  Implementing a log store with the `MonotonicLogStore` interface where `IsMonotonic()` returns true will allow Raft to clear all previous logs on user restores of Raft snapshots.\n\nBUG FIXES\n* Restoring a snapshot with the raft-wal log store caused a panic due to index gap that is created during snapshot restores.\n\n# 1.3.0 (April 22nd, 2021)\n\nIMPROVEMENTS\n\n* Added metrics for `oldestLogAge` and `lastRestoreDuration` to monitor capacity issues that can cause unrecoverable cluster failure  [[GH-452](https://github.com/hashicorp/raft/pull/452)][[GH-454](https://github.com/hashicorp/raft/pull/454/files)]\n* Made `TrailingLogs`, `SnapshotInterval` and `SnapshotThreshold` reloadable at runtime using a new `ReloadConfig` method. This allows recovery from cases where there are not enough logs retained for followers to catchup after a restart. [[GH-444](https://github.com/hashicorp/raft/pull/444)]\n* Inclusify the repository by switching to main [[GH-446](https://github.com/hashicorp/raft/pull/446)]\n* Add option for a buffered `ApplyCh` if `MaxAppendEntries` is enabled [[GH-445](https://github.com/hashicorp/raft/pull/445)]\n* Add string to `LogType` for more human readable debugging [[GH-442](https://github.com/hashicorp/raft/pull/442)]\n* Extract fuzzy testing into its own module [[GH-459](https://github.com/hashicorp/raft/pull/459)]\n\nBUG FIXES\n* Update LogCache `StoreLogs()` to capture an error that would previously cause a panic [[GH-460](https://github.com/hashicorp/raft/pull/460)]\n\n# 1.2.0 (October 5th, 2020)\n\nIMPROVEMENTS\n\n* Remove `StartAsLeader` configuration option [[GH-364](https://github.com/hashicorp/raft/pull/386)]\n* Allow futures to react to `Shutdown()` to prevent a deadlock with `takeSnapshot()` [[GH-390](https://github.com/hashicorp/raft/pull/390)]\n* Prevent non-voters from becoming eligible for leadership elections [[GH-398](https://github.com/hashicorp/raft/pull/398)]\n* Remove an unneeded `io.Copy` from snapshot writes [[GH-399](https://github.com/hashicorp/raft/pull/399)]\n* Log decoded candidate address in `duplicate requestVote` warning [[GH-400](https://github.com/hashicorp/raft/pull/400)]\n* Prevent starting a TCP transport when IP address is `nil` [[GH-403](https://github.com/hashicorp/raft/pull/403)]\n* Reject leadership transfer requests when in candidate state to prevent indefinite blocking while unable to elect a leader [[GH-413](https://github.com/hashicorp/raft/pull/413)]\n* Add labels for metric metadata to reduce cardinality of metric names [[GH-409](https://github.com/hashicorp/raft/pull/409)]\n* Add peers metric [[GH-413](https://github.com/hashicorp/raft/pull/431)]\n\nBUG FIXES\n\n* Make `LeaderCh` always deliver the latest leadership transition [[GH-384](https://github.com/hashicorp/raft/pull/384)]\n* Handle updating an existing peer in `startStopReplication` [[GH-419](https://github.com/hashicorp/raft/pull/419)]\n\n# 1.1.2 (January 17th, 2020)\n\nFEATURES\n\n* Improve FSM apply performance through batching. Implementing the `BatchingFSM` interface enables this new feature [[GH-364](https://github.com/hashicorp/raft/pull/364)]\n* Add ability to obtain Raft configuration before Raft starts with GetConfiguration [[GH-369](https://github.com/hashicorp/raft/pull/369)]\n\nIMPROVEMENTS\n\n* Remove lint violations and add a `make` rule for running the linter.\n* Replace logger with hclog [[GH-360](https://github.com/hashicorp/raft/pull/360)]\n* Read latest configuration independently from main loop [[GH-379](https://github.com/hashicorp/raft/pull/379)]\n\nBUG FIXES\n\n* Export the leader field in LeaderObservation [[GH-357](https://github.com/hashicorp/raft/pull/357)]\n* Fix snapshot to not attempt to truncate a negative range [[GH-358](https://github.com/hashicorp/raft/pull/358)]\n* Check for shutdown in inmemPipeline before sending RPCs [[GH-276](https://github.com/hashicorp/raft/pull/276)]\n\n# 1.1.1 (July 23rd, 2019)\n\nFEATURES\n\n* Add support for extensions to be sent on log entries [[GH-353](https://github.com/hashicorp/raft/pull/353)]\n* Add config option to skip snapshot restore on startup [[GH-340](https://github.com/hashicorp/raft/pull/340)]\n* Add optional configuration store interface [[GH-339](https://github.com/hashicorp/raft/pull/339)]\n\nIMPROVEMENTS\n\n* Break out of group commit early when no logs are present [[GH-341](https://github.com/hashicorp/raft/pull/341)]\n\nBUGFIXES\n\n* Fix 64-bit counters on 32-bit platforms [[GH-344](https://github.com/hashicorp/raft/pull/344)]\n* Don't defer closing source in recover/restore operations since it's in a loop [[GH-337](https://github.com/hashicorp/raft/pull/337)]\n\n# 1.1.0 (May 23rd, 2019)\n\nFEATURES\n\n* Add transfer leadership extension [[GH-306](https://github.com/hashicorp/raft/pull/306)]\n\nIMPROVEMENTS\n\n* Move to `go mod` [[GH-323](https://github.com/hashicorp/consul/pull/323)]\n* Leveled log [[GH-321](https://github.com/hashicorp/consul/pull/321)]\n* Add peer changes to observations [[GH-326](https://github.com/hashicorp/consul/pull/326)]\n\nBUGFIXES\n\n* Copy the contents of an InmemSnapshotStore when opening a snapshot [[GH-270](https://github.com/hashicorp/consul/pull/270)]\n* Fix logging panic when converting parameters to strings [[GH-332](https://github.com/hashicorp/consul/pull/332)]\n\n# 1.0.1 (April 12th, 2019)\n\nIMPROVEMENTS\n\n* InMemTransport: Add timeout for sending a message [[GH-313](https://github.com/hashicorp/raft/pull/313)]\n* ensure 'make deps' downloads test dependencies like testify [[GH-310](https://github.com/hashicorp/raft/pull/310)]\n* Clarifies function of CommitTimeout [[GH-309](https://github.com/hashicorp/raft/pull/309)]\n* Add additional metrics regarding log dispatching and committal [[GH-316](https://github.com/hashicorp/raft/pull/316)]\n\n# 1.0.0 (October 3rd, 2017)\n\nv1.0.0 takes the changes that were staged in the library-v2-stage-one branch. This version manages server identities using a UUID, so introduces some breaking API changes. It also versions the Raft protocol, and requires some special steps when interoperating with Raft servers running older versions of the library (see the detailed comment in config.go about version compatibility). You can reference https://github.com/hashicorp/consul/pull/2222 for an idea of what was required to port Consul to these new interfaces.\n\n# 0.1.0 (September 29th, 2017)\n\nv0.1.0 is the original stable version of the library that was in main and has been maintained with no breaking API changes. This was in use by Consul prior to version 0.7.0.\n"
  },
  {
    "path": "LICENSE",
    "content": "Copyright IBM Corp. 2013, 2025\n\nMozilla Public License, version 2.0\n\n1. Definitions\n\n1.1. “Contributor”\n\n     means each individual or legal entity that creates, contributes to the\n     creation of, or owns Covered Software.\n\n1.2. “Contributor Version”\n\n     means the combination of the Contributions of others (if any) used by a\n     Contributor and that particular Contributor’s Contribution.\n\n1.3. “Contribution”\n\n     means Covered Software of a particular Contributor.\n\n1.4. “Covered Software”\n\n     means Source Code Form to which the initial Contributor has attached the\n     notice in Exhibit A, the Executable Form of such Source Code Form, and\n     Modifications of such Source Code Form, in each case including portions\n     thereof.\n\n1.5. “Incompatible With Secondary Licenses”\n     means\n\n     a. that the initial Contributor has attached the notice described in\n        Exhibit B to the Covered Software; or\n\n     b. that the Covered Software was made available under the terms of version\n        1.1 or earlier of the License, but not also under the terms of a\n        Secondary License.\n\n1.6. “Executable Form”\n\n     means any form of the work other than Source Code Form.\n\n1.7. “Larger Work”\n\n     means a work that combines Covered Software with other material, in a separate\n     file or files, that is not Covered Software.\n\n1.8. “License”\n\n     means this document.\n\n1.9. “Licensable”\n\n     means having the right to grant, to the maximum extent possible, whether at the\n     time of the initial grant or subsequently, any and all of the rights conveyed by\n     this License.\n\n1.10. “Modifications”\n\n     means any of the following:\n\n     a. any file in Source Code Form that results from an addition to, deletion\n        from, or modification of the contents of Covered Software; or\n\n     b. any new file in Source Code Form that contains any Covered Software.\n\n1.11. “Patent Claims” of a Contributor\n\n      means any patent claim(s), including without limitation, method, process,\n      and apparatus claims, in any patent Licensable by such Contributor that\n      would be infringed, but for the grant of the License, by the making,\n      using, selling, offering for sale, having made, import, or transfer of\n      either its Contributions or its Contributor Version.\n\n1.12. “Secondary License”\n\n      means either the GNU General Public License, Version 2.0, the GNU Lesser\n      General Public License, Version 2.1, the GNU Affero General Public\n      License, Version 3.0, or any later versions of those licenses.\n\n1.13. “Source Code Form”\n\n      means the form of the work preferred for making modifications.\n\n1.14. “You” (or “Your”)\n\n      means an individual or a legal entity exercising rights under this\n      License. For legal entities, “You” includes any entity that controls, is\n      controlled by, or is under common control with You. For purposes of this\n      definition, “control” means (a) the power, direct or indirect, to cause\n      the direction or management of such entity, whether by contract or\n      otherwise, or (b) ownership of more than fifty percent (50%) of the\n      outstanding shares or beneficial ownership of such entity.\n\n\n2. License Grants and Conditions\n\n2.1. Grants\n\n     Each Contributor hereby grants You a world-wide, royalty-free,\n     non-exclusive license:\n\n     a. under intellectual property rights (other than patent or trademark)\n        Licensable by such Contributor to use, reproduce, make available,\n        modify, display, perform, distribute, and otherwise exploit its\n        Contributions, either on an unmodified basis, with Modifications, or as\n        part of a Larger Work; and\n\n     b. under Patent Claims of such Contributor to make, use, sell, offer for\n        sale, have made, import, and otherwise transfer either its Contributions\n        or its Contributor Version.\n\n2.2. Effective Date\n\n     The licenses granted in Section 2.1 with respect to any Contribution become\n     effective for each Contribution on the date the Contributor first distributes\n     such Contribution.\n\n2.3. Limitations on Grant Scope\n\n     The licenses granted in this Section 2 are the only rights granted under this\n     License. No additional rights or licenses will be implied from the distribution\n     or licensing of Covered Software under this License. Notwithstanding Section\n     2.1(b) above, no patent license is granted by a Contributor:\n\n     a. for any code that a Contributor has removed from Covered Software; or\n\n     b. for infringements caused by: (i) Your and any other third party’s\n        modifications of Covered Software, or (ii) the combination of its\n        Contributions with other software (except as part of its Contributor\n        Version); or\n\n     c. under Patent Claims infringed by Covered Software in the absence of its\n        Contributions.\n\n     This License does not grant any rights in the trademarks, service marks, or\n     logos of any Contributor (except as may be necessary to comply with the\n     notice requirements in Section 3.4).\n\n2.4. Subsequent Licenses\n\n     No Contributor makes additional grants as a result of Your choice to\n     distribute the Covered Software under a subsequent version of this License\n     (see Section 10.2) or under the terms of a Secondary License (if permitted\n     under the terms of Section 3.3).\n\n2.5. Representation\n\n     Each Contributor represents that the Contributor believes its Contributions\n     are its original creation(s) or it has sufficient rights to grant the\n     rights to its Contributions conveyed by this License.\n\n2.6. Fair Use\n\n     This License is not intended to limit any rights You have under applicable\n     copyright doctrines of fair use, fair dealing, or other equivalents.\n\n2.7. Conditions\n\n     Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in\n     Section 2.1.\n\n\n3. Responsibilities\n\n3.1. Distribution of Source Form\n\n     All distribution of Covered Software in Source Code Form, including any\n     Modifications that You create or to which You contribute, must be under the\n     terms of this License. You must inform recipients that the Source Code Form\n     of the Covered Software is governed by the terms of this License, and how\n     they can obtain a copy of this License. You may not attempt to alter or\n     restrict the recipients’ rights in the Source Code Form.\n\n3.2. Distribution of Executable Form\n\n     If You distribute Covered Software in Executable Form then:\n\n     a. such Covered Software must also be made available in Source Code Form,\n        as described in Section 3.1, and You must inform recipients of the\n        Executable Form how they can obtain a copy of such Source Code Form by\n        reasonable means in a timely manner, at a charge no more than the cost\n        of distribution to the recipient; and\n\n     b. You may distribute such Executable Form under the terms of this License,\n        or sublicense it under different terms, provided that the license for\n        the Executable Form does not attempt to limit or alter the recipients’\n        rights in the Source Code Form under this License.\n\n3.3. Distribution of a Larger Work\n\n     You may create and distribute a Larger Work under terms of Your choice,\n     provided that You also comply with the requirements of this License for the\n     Covered Software. If the Larger Work is a combination of Covered Software\n     with a work governed by one or more Secondary Licenses, and the Covered\n     Software is not Incompatible With Secondary Licenses, this License permits\n     You to additionally distribute such Covered Software under the terms of\n     such Secondary License(s), so that the recipient of the Larger Work may, at\n     their option, further distribute the Covered Software under the terms of\n     either this License or such Secondary License(s).\n\n3.4. Notices\n\n     You may not remove or alter the substance of any license notices (including\n     copyright notices, patent notices, disclaimers of warranty, or limitations\n     of liability) contained within the Source Code Form of the Covered\n     Software, except that You may alter any license notices to the extent\n     required to remedy known factual inaccuracies.\n\n3.5. Application of Additional Terms\n\n     You may choose to offer, and to charge a fee for, warranty, support,\n     indemnity or liability obligations to one or more recipients of Covered\n     Software. However, You may do so only on Your own behalf, and not on behalf\n     of any Contributor. You must make it absolutely clear that any such\n     warranty, support, indemnity, or liability obligation is offered by You\n     alone, and You hereby agree to indemnify every Contributor for any\n     liability incurred by such Contributor as a result of warranty, support,\n     indemnity or liability terms You offer. You may include additional\n     disclaimers of warranty and limitations of liability specific to any\n     jurisdiction.\n\n4. Inability to Comply Due to Statute or Regulation\n\n   If it is impossible for You to comply with any of the terms of this License\n   with respect to some or all of the Covered Software due to statute, judicial\n   order, or regulation then You must: (a) comply with the terms of this License\n   to the maximum extent possible; and (b) describe the limitations and the code\n   they affect. Such description must be placed in a text file included with all\n   distributions of the Covered Software under this License. Except to the\n   extent prohibited by statute or regulation, such description must be\n   sufficiently detailed for a recipient of ordinary skill to be able to\n   understand it.\n\n5. Termination\n\n5.1. The rights granted under this License will terminate automatically if You\n     fail to comply with any of its terms. However, if You become compliant,\n     then the rights granted under this License from a particular Contributor\n     are reinstated (a) provisionally, unless and until such Contributor\n     explicitly and finally terminates Your grants, and (b) on an ongoing basis,\n     if such Contributor fails to notify You of the non-compliance by some\n     reasonable means prior to 60 days after You have come back into compliance.\n     Moreover, Your grants from a particular Contributor are reinstated on an\n     ongoing basis if such Contributor notifies You of the non-compliance by\n     some reasonable means, this is the first time You have received notice of\n     non-compliance with this License from such Contributor, and You become\n     compliant prior to 30 days after Your receipt of the notice.\n\n5.2. If You initiate litigation against any entity by asserting a patent\n     infringement claim (excluding declaratory judgment actions, counter-claims,\n     and cross-claims) alleging that a Contributor Version directly or\n     indirectly infringes any patent, then the rights granted to You by any and\n     all Contributors for the Covered Software under Section 2.1 of this License\n     shall terminate.\n\n5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user\n     license agreements (excluding distributors and resellers) which have been\n     validly granted by You or Your distributors under this License prior to\n     termination shall survive termination.\n\n6. Disclaimer of Warranty\n\n   Covered Software is provided under this License on an “as is” basis, without\n   warranty of any kind, either expressed, implied, or statutory, including,\n   without limitation, warranties that the Covered Software is free of defects,\n   merchantable, fit for a particular purpose or non-infringing. The entire\n   risk as to the quality and performance of the Covered Software is with You.\n   Should any Covered Software prove defective in any respect, You (not any\n   Contributor) assume the cost of any necessary servicing, repair, or\n   correction. This disclaimer of warranty constitutes an essential part of this\n   License. No use of  any Covered Software is authorized under this License\n   except under this disclaimer.\n\n7. Limitation of Liability\n\n   Under no circumstances and under no legal theory, whether tort (including\n   negligence), contract, or otherwise, shall any Contributor, or anyone who\n   distributes Covered Software as permitted above, be liable to You for any\n   direct, indirect, special, incidental, or consequential damages of any\n   character including, without limitation, damages for lost profits, loss of\n   goodwill, work stoppage, computer failure or malfunction, or any and all\n   other commercial damages or losses, even if such party shall have been\n   informed of the possibility of such damages. This limitation of liability\n   shall not apply to liability for death or personal injury resulting from such\n   party’s negligence to the extent applicable law prohibits such limitation.\n   Some jurisdictions do not allow the exclusion or limitation of incidental or\n   consequential damages, so this exclusion and limitation may not apply to You.\n\n8. Litigation\n\n   Any litigation relating to this License may be brought only in the courts of\n   a jurisdiction where the defendant maintains its principal place of business\n   and such litigation shall be governed by laws of that jurisdiction, without\n   reference to its conflict-of-law provisions. Nothing in this Section shall\n   prevent a party’s ability to bring cross-claims or counter-claims.\n\n9. Miscellaneous\n\n   This License represents the complete agreement concerning the subject matter\n   hereof. If any provision of this License is held to be unenforceable, such\n   provision shall be reformed only to the extent necessary to make it\n   enforceable. Any law or regulation which provides that the language of a\n   contract shall be construed against the drafter shall not be used to construe\n   this License against a Contributor.\n\n\n10. Versions of the License\n\n10.1. New Versions\n\n      Mozilla Foundation is the license steward. Except as provided in Section\n      10.3, no one other than the license steward has the right to modify or\n      publish new versions of this License. Each version will be given a\n      distinguishing version number.\n\n10.2. Effect of New Versions\n\n      You may distribute the Covered Software under the terms of the version of\n      the License under which You originally received the Covered Software, or\n      under the terms of any subsequent version published by the license\n      steward.\n\n10.3. Modified Versions\n\n      If you create software not governed by this License, and you want to\n      create a new license for such software, you may create and use a modified\n      version of this License if you rename the license and remove any\n      references to the name of the license steward (except to note that such\n      modified license differs from this License).\n\n10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses\n      If You choose to distribute Source Code Form that is Incompatible With\n      Secondary Licenses under the terms of this version of the License, the\n      notice described in Exhibit B of this License must be attached.\n\nExhibit A - Source Code Form License Notice\n\n      This Source Code Form is subject to the\n      terms of the Mozilla Public License, v.\n      2.0. If a copy of the MPL was not\n      distributed with this file, You can\n      obtain one at\n      http://mozilla.org/MPL/2.0/.\n\nIf it is not possible or desirable to put the notice in a particular file, then\nYou may include the notice in a location (such as a LICENSE file in a relevant\ndirectory) where a recipient would be likely to look for such a notice.\n\nYou may add additional accurate notices of copyright ownership.\n\nExhibit B - “Incompatible With Secondary Licenses” Notice\n\n      This Source Code Form is “Incompatible\n      With Secondary Licenses”, as defined by\n      the Mozilla Public License, v. 2.0.\n\n"
  },
  {
    "path": "Makefile",
    "content": "DEPS = $(go list -f '{{range .TestImports}}{{.}} {{end}}' ./...)\nENV  = $(shell go env GOPATH)\nGO_VERSION  = $(shell go version)\nGOLANG_CI_VERSION = v1.19.0\n\n# Look for versions prior to 1.10 which have a different fmt output\n# and don't lint with gofmt against them.\nifneq (,$(findstring go version go1.8, $(GO_VERSION)))\n\tFMT=\nelse ifneq (,$(findstring go version go1.9, $(GO_VERSION)))\n\tFMT=\nelse\n    FMT=--enable gofmt\nendif\n\nTEST_RESULTS_DIR?=/tmp/test-results\n\ntest:\n\tGOTRACEBACK=all go test $(TESTARGS) -timeout=180s -race .\n\tGOTRACEBACK=all go test $(TESTARGS) -timeout=180s -tags batchtest -race .\n\ninteg: test\n\tINTEG_TESTS=yes go test $(TESTARGS) -timeout=60s -run=Integ .\n\tINTEG_TESTS=yes go test $(TESTARGS) -timeout=60s -tags batchtest -run=Integ .\n\nfuzz:\n\tcd ./fuzzy && go test $(TESTARGS) -timeout=20m .\n\tcd ./fuzzy && go test $(TESTARGS) -timeout=20m -tags batchtest .\n\ndeps:\n\tgo get -t -d -v ./...\n\techo $(DEPS) | xargs -n1 go get -d\n\nlint:\n\tgofmt -s -w .\n\tgolangci-lint run -c .golangci-lint.yml $(FMT) .\n\ndep-linter:\n\tcurl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | sh -s -- -b $(ENV)/bin $(GOLANG_CI_VERSION)\n\ncov:\n\tINTEG_TESTS=yes gocov test github.com/hashicorp/raft | gocov-html > /tmp/coverage.html\n\topen /tmp/coverage.html\n\n.PHONY: test cov integ deps dep-linter lint\n"
  },
  {
    "path": "README.md",
    "content": "raft [![Build Status](https://github.com/hashicorp/raft/workflows/ci/badge.svg)](https://github.com/hashicorp/raft/actions)\n[![Go Reference](https://pkg.go.dev/badge/github.com/hashicorp/raft.svg)](https://pkg.go.dev/github.com/hashicorp/raft)\n[![Go Report Card](https://goreportcard.com/badge/github.com/hashicorp/raft)](https://goreportcard.com/report/github.com/hashicorp/raft)\n[![License: MPL 2.0](https://img.shields.io/badge/License-MPL%202.0-brightgreen.svg)](https://opensource.org/licenses/MPL-2.0)\n[![Build Status](https://github.com/hashicorp/raft/actions/workflows/ci.yml/badge.svg)](https://github.com/hashicorp/raft/actions)\n[![Release](https://img.shields.io/github/v/release/hashicorp/raft)](https://github.com/hashicorp/raft/releases)\n[![Issues](https://img.shields.io/github/issues/hashicorp/raft)](https://github.com/hashicorp/raft/issues)\n[![Pull Requests](https://img.shields.io/github/issues-pr/hashicorp/raft)](https://github.com/hashicorp/raft/pulls)\n====\n\nraft is a [Go](http://www.golang.org) library that manages a replicated\nlog and can be used with an FSM to manage replicated state machines. It\nis a library for providing [consensus](http://en.wikipedia.org/wiki/Consensus_(computer_science)).\n\nThe use cases for such a library are far-reaching, such as replicated state\nmachines which are a key component of many distributed systems. They enable\nbuilding Consistent, Partition Tolerant (CP) systems, with limited\nfault tolerance as well.\n\n## Building\n\nIf you wish to build raft you'll need Go version 1.16+ installed.\n\nPlease check your installation with:\n\n```\ngo version\n```\n\n## Documentation\n\nFor complete documentation, see the associated [Godoc](http://godoc.org/github.com/hashicorp/raft).\n\nTo prevent complications with cgo, the primary backend `MDBStore` is in a separate repository,\ncalled [raft-mdb](http://github.com/hashicorp/raft-mdb). That is the recommended implementation\nfor the `LogStore` and `StableStore`.\n\nA pure Go backend using [Bbolt](https://github.com/etcd-io/bbolt) is also available called\n[raft-boltdb](https://github.com/hashicorp/raft-boltdb). It can also be used as a `LogStore`\nand `StableStore`.\n\n\n## Community Contributed Examples \n- [Raft gRPC Example](https://github.com/Jille/raft-grpc-example) - Utilizing the Raft repository with gRPC\n- [Raft-based KV-store Example](https://github.com/otoolep/hraftd) - Uses Hashicorp Raft to build a distributed key-value store\n\n\n## Tagged Releases\n\nAs of September 2017, HashiCorp will start using tags for this library to clearly indicate\nmajor version updates. We recommend you vendor your application's dependency on this library.\n\n* v0.1.0 is the original stable version of the library that was in main and has been maintained\nwith no breaking API changes. This was in use by Consul prior to version 0.7.0.\n\n* v1.0.0 takes the changes that were staged in the library-v2-stage-one branch. This version\nmanages server identities using a UUID, so introduces some breaking API changes. It also versions\nthe Raft protocol, and requires some special steps when interoperating with Raft servers running\nolder versions of the library (see the detailed comment in config.go about version compatibility).\nYou can reference https://github.com/hashicorp/consul/pull/2222 for an idea of what was required\nto port Consul to these new interfaces.\n\n    This version includes some new features as well, including non voting servers, a new address\n    provider abstraction in the transport layer, and more resilient snapshots.\n\n## Protocol\n\nraft is based on [\"Raft: In Search of an Understandable Consensus Algorithm\"](https://raft.github.io/raft.pdf)\n\nA high level overview of the Raft protocol is described below, but for details please read the full\n[Raft paper](https://raft.github.io/raft.pdf)\nfollowed by the raft source. Any questions about the raft protocol should be sent to the\n[raft-dev mailing list](https://groups.google.com/forum/#!forum/raft-dev).\n\n### Protocol Description\n\nRaft nodes are always in one of three states: follower, candidate or leader. All\nnodes initially start out as a follower. In this state, nodes can accept log entries\nfrom a leader and cast votes. If no entries are received for some time, nodes\nself-promote to the candidate state. In the candidate state nodes request votes from\ntheir peers. If a candidate receives a quorum of votes, then it is promoted to a leader.\nThe leader must accept new log entries and replicate to all the other followers.\nIn addition, if stale reads are not acceptable, all queries must also be performed on\nthe leader.\n\nOnce a cluster has a leader, it is able to accept new log entries. A client can\nrequest that a leader append a new log entry, which is an opaque binary blob to\nRaft. The leader then writes the entry to durable storage and attempts to replicate\nto a quorum of followers. Once the log entry is considered *committed*, it can be\n*applied* to a finite state machine. The finite state machine is application specific,\nand is implemented using an interface.\n\nAn obvious question relates to the unbounded nature of a replicated log. Raft provides\na mechanism by which the current state is snapshotted, and the log is compacted. Because\nof the FSM abstraction, restoring the state of the FSM must result in the same state\nas a replay of old logs. This allows Raft to capture the FSM state at a point in time,\nand then remove all the logs that were used to reach that state. This is performed automatically\nwithout user intervention, and prevents unbounded disk usage as well as minimizing\ntime spent replaying logs.\n\nLastly, there is the issue of updating the peer set when new servers are joining\nor existing servers are leaving. As long as a quorum of nodes is available, this\nis not an issue as Raft provides mechanisms to dynamically update the peer set.\nIf a quorum of nodes is unavailable, then this becomes a very challenging issue.\nFor example, suppose there are only 2 peers, A and B. The quorum size is also\n2, meaning both nodes must agree to commit a log entry. If either A or B fails,\nit is now impossible to reach quorum. This means the cluster is unable to add,\nor remove a node, or commit any additional log entries. This results in *unavailability*.\nAt this point, manual intervention would be required to remove either A or B,\nand to restart the remaining node in bootstrap mode.\n\nA Raft cluster of 3 nodes can tolerate a single node failure, while a cluster\nof 5 can tolerate 2 node failures. The recommended configuration is to either\nrun 3 or 5 raft servers. This maximizes availability without\ngreatly sacrificing performance.\n\nIn terms of performance, Raft is comparable to Paxos. Assuming stable leadership,\ncommitting a log entry requires a single round trip to half of the cluster.\nThus performance is bound by disk I/O and network latency.\n\n\n  ## Metrics Emission and Compatibility\n\n  This library can emit metrics using either `github.com/armon/go-metrics` or `github.com/hashicorp/go-metrics`. Choosing between the libraries is controlled via build tags. \n\n  **Build Tags**\n  * `armonmetrics` - Using this tag will cause metrics to be routed to `armon/go-metrics`\n  * `hashicorpmetrics` - Using this tag will cause all metrics to be routed to `hashicorp/go-metrics`\n\n  If no build tag is specified, the default behavior is to use `armon/go-metrics`. \n\n  **Deprecating `armon/go-metrics`**\n\n  Emitting metrics to `armon/go-metrics` is officially deprecated. Usage of `armon/go-metrics` will remain the default until mid-2025 with opt-in support continuing to the end of 2025.\n\n  **Migration**\n  To migrate an application currently using the older `armon/go-metrics` to instead use `hashicorp/go-metrics` the following should be done.\n\n  1. Upgrade libraries using `armon/go-metrics` to consume `hashicorp/go-metrics/compat` instead. This should involve only changing import statements. All repositories in the `hashicorp` namespace\n  2. Update an applications library dependencies to those that have the compatibility layer configured.\n  3. Update the application to use `hashicorp/go-metrics` for configuring metrics export instead of `armon/go-metrics`\n     * Replace all application imports of `github.com/armon/go-metrics` with `github.com/hashicorp/go-metrics`\n     * Instrument your build system to build with the `hashicorpmetrics` tag.\n\n  Eventually once the default behavior changes to use `hashicorp/go-metrics` by default (mid-2025), you can drop the `hashicorpmetrics` build tag.\n"
  },
  {
    "path": "api.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"errors\"\n\t\"fmt\"\n\t\"io\"\n\t\"strconv\"\n\t\"sync\"\n\t\"sync/atomic\"\n\t\"time\"\n\n\thclog \"github.com/hashicorp/go-hclog\"\n\tmetrics \"github.com/hashicorp/go-metrics/compat\"\n)\n\nconst (\n\t// SuggestedMaxDataSize of the data in a raft log entry, in bytes.\n\t//\n\t// The value is based on current architecture, default timing, etc. Clients can\n\t// ignore this value if they want as there is no actual hard checking\n\t// within the library. As the library is enhanced this value may change\n\t// over time to reflect current suggested maximums.\n\t//\n\t// Applying log entries with data greater than this size risks RPC IO taking\n\t// too long and preventing timely heartbeat signals.  These signals are sent in serial\n\t// in current transports, potentially causing leadership instability.\n\tSuggestedMaxDataSize = 512 * 1024\n)\n\nvar (\n\t// ErrLeader is returned when an operation can't be completed on a\n\t// leader node.\n\tErrLeader = errors.New(\"node is the leader\")\n\n\t// ErrNotLeader is returned when an operation can't be completed on a\n\t// follower or candidate node.\n\tErrNotLeader = errors.New(\"node is not the leader\")\n\n\t// ErrNotVoter is returned when an operation can't be completed on a\n\t// non-voter node.\n\tErrNotVoter = errors.New(\"node is not a voter\")\n\n\t// ErrLeadershipLost is returned when a leader fails to commit a log entry\n\t// because it's been deposed in the process.\n\tErrLeadershipLost = errors.New(\"leadership lost while committing log\")\n\n\t// ErrAbortedByRestore is returned when a leader fails to commit a log\n\t// entry because it's been superseded by a user snapshot restore.\n\tErrAbortedByRestore = errors.New(\"snapshot restored while committing log\")\n\n\t// ErrRaftShutdown is returned when operations are requested against an\n\t// inactive Raft.\n\tErrRaftShutdown = errors.New(\"raft is already shutdown\")\n\n\t// ErrEnqueueTimeout is returned when a command fails due to a timeout.\n\tErrEnqueueTimeout = errors.New(\"timed out enqueuing operation\")\n\n\t// ErrNothingNewToSnapshot is returned when trying to create a snapshot\n\t// but there's nothing new committed to the FSM since we started.\n\tErrNothingNewToSnapshot = errors.New(\"nothing new to snapshot\")\n\n\t// ErrUnsupportedProtocol is returned when an operation is attempted\n\t// that's not supported by the current protocol version.\n\tErrUnsupportedProtocol = errors.New(\"operation not supported with current protocol version\")\n\n\t// ErrCantBootstrap is returned when attempt is made to bootstrap a\n\t// cluster that already has state present.\n\tErrCantBootstrap = errors.New(\"bootstrap only works on new clusters\")\n\n\t// ErrLeadershipTransferInProgress is returned when the leader is rejecting\n\t// client requests because it is attempting to transfer leadership.\n\tErrLeadershipTransferInProgress = errors.New(\"leadership transfer in progress\")\n)\n\n// Raft implements a Raft node.\ntype Raft struct {\n\traftState\n\n\t// protocolVersion is used to inter-operate with Raft servers running\n\t// different versions of the library. See comments in config.go for more\n\t// details.\n\tprotocolVersion ProtocolVersion\n\n\t// applyCh is used to async send logs to the main thread to\n\t// be committed and applied to the FSM.\n\tapplyCh chan *logFuture\n\n\t// conf stores the current configuration to use. This is the most recent one\n\t// provided. All reads of config values should use the config() helper method\n\t// to read this safely.\n\tconf atomic.Value\n\n\t// confReloadMu ensures that only one thread can reload config at once since\n\t// we need to read-modify-write the atomic. It is NOT necessary to hold this\n\t// for any other operation e.g. reading config using config().\n\tconfReloadMu sync.Mutex\n\n\t// FSM is the client state machine to apply commands to\n\tfsm FSM\n\n\t// fsmMutateCh is used to send state-changing updates to the FSM. This\n\t// receives pointers to commitTuple structures when applying logs or\n\t// pointers to restoreFuture structures when restoring a snapshot. We\n\t// need control over the order of these operations when doing user\n\t// restores so that we finish applying any old log applies before we\n\t// take a user snapshot on the leader, otherwise we might restore the\n\t// snapshot and apply old logs to it that were in the pipe.\n\tfsmMutateCh chan interface{}\n\n\t// fsmSnapshotCh is used to trigger a new snapshot being taken\n\tfsmSnapshotCh chan *reqSnapshotFuture\n\n\t// lastContact is the last time we had contact from the\n\t// leader node. This can be used to gauge staleness.\n\tlastContact     time.Time\n\tlastContactLock sync.RWMutex\n\n\t// leaderAddr is the current cluster leader Address\n\tleaderAddr ServerAddress\n\t// LeaderID is the current cluster leader ID\n\tleaderID   ServerID\n\tleaderLock sync.RWMutex\n\n\t// leaderCh is used to notify of leadership changes\n\tleaderCh chan bool\n\n\t// leaderState used only while state is leader\n\tleaderState leaderState\n\n\t// candidateFromLeadershipTransfer is used to indicate that this server became\n\t// candidate because the leader tries to transfer leadership. This flag is\n\t// used in RequestVoteRequest to express that a leadership transfer is going\n\t// on.\n\tcandidateFromLeadershipTransfer atomic.Bool\n\n\t// Stores our local server ID, used to avoid sending RPCs to ourself\n\tlocalID ServerID\n\n\t// Stores our local addr\n\tlocalAddr ServerAddress\n\n\t// Used for our logging\n\tlogger hclog.Logger\n\n\t// LogStore provides durable storage for logs\n\tlogs LogStore\n\n\t// Used to request the leader to make configuration changes.\n\tconfigurationChangeCh chan *configurationChangeFuture\n\n\t// Tracks the latest configuration and latest committed configuration from\n\t// the log/snapshot.\n\tconfigurations configurations\n\n\t// Holds a copy of the latest configuration which can be read independently\n\t// of the main loop.\n\tlatestConfiguration atomic.Value\n\n\t// RPC chan comes from the transport layer\n\trpcCh <-chan RPC\n\n\t// Shutdown channel to exit, protected to prevent concurrent exits\n\tshutdown     bool\n\tshutdownCh   chan struct{}\n\tshutdownLock sync.Mutex\n\n\t// snapshots is used to store and retrieve snapshots\n\tsnapshots SnapshotStore\n\n\t// userSnapshotCh is used for user-triggered snapshots\n\tuserSnapshotCh chan *userSnapshotFuture\n\n\t// userRestoreCh is used for user-triggered restores of external\n\t// snapshots\n\tuserRestoreCh chan *userRestoreFuture\n\n\t// stable is a StableStore implementation for durable state\n\t// It provides stable storage for many fields in raftState\n\tstable StableStore\n\n\t// The transport layer we use\n\ttrans Transport\n\n\t// verifyCh is used to async send verify futures to the main thread\n\t// to verify we are still the leader\n\tverifyCh chan *verifyFuture\n\n\t// configurationsCh is used to get the configuration data safely from\n\t// outside of the main thread.\n\tconfigurationsCh chan *configurationsFuture\n\n\t// bootstrapCh is used to attempt an initial bootstrap from outside of\n\t// the main thread.\n\tbootstrapCh chan *bootstrapFuture\n\n\t// List of observers and the mutex that protects them. The observers list\n\t// is indexed by an artificial ID which is used for deregistration.\n\tobserversLock sync.RWMutex\n\tobservers     map[uint64]*Observer\n\n\t// leadershipTransferCh is used to start a leadership transfer from outside of\n\t// the main thread.\n\tleadershipTransferCh chan *leadershipTransferFuture\n\n\t// leaderNotifyCh is used to tell leader that config has changed\n\tleaderNotifyCh chan struct{}\n\n\t// followerNotifyCh is used to tell followers that config has changed\n\tfollowerNotifyCh chan struct{}\n\n\t// mainThreadSaturation measures the saturation of the main raft goroutine.\n\tmainThreadSaturation *saturationMetric\n\n\t// preVoteDisabled control if the pre-vote feature is activated,\n\t// prevote feature is disabled if set to true.\n\tpreVoteDisabled bool\n\n\t// noLegacyTelemetry allows to skip the legacy metrics to avoid duplicates.\n\t// legacy metrics are those that have `_peer_name` as metric suffix instead as labels.\n\t// e.g: raft_replication_heartbeat_peer0\n\tnoLegacyTelemetry bool\n}\n\n// BootstrapCluster initializes a server's storage with the given cluster\n// configuration. This should only be called at the beginning of time for the\n// cluster with an identical configuration listing all Voter servers. There is\n// no need to bootstrap Nonvoter and Staging servers.\n//\n// A cluster can only be bootstrapped once from a single participating Voter\n// server. Any further attempts to bootstrap will return an error that can be\n// safely ignored.\n//\n// One approach is to bootstrap a single server with a configuration\n// listing just itself as a Voter, then invoke AddVoter() on it to add other\n// servers to the cluster.\nfunc BootstrapCluster(conf *Config, logs LogStore, stable StableStore,\n\tsnaps SnapshotStore, trans Transport, configuration Configuration,\n) error {\n\t// Validate the Raft server config.\n\tif err := ValidateConfig(conf); err != nil {\n\t\treturn err\n\t}\n\n\t// Sanity check the Raft peer configuration.\n\tif err := checkConfiguration(configuration); err != nil {\n\t\treturn err\n\t}\n\n\t// Make sure the cluster is in a clean state.\n\thasState, err := HasExistingState(logs, stable, snaps)\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to check for existing state: %v\", err)\n\t}\n\tif hasState {\n\t\treturn ErrCantBootstrap\n\t}\n\n\t// Set current term to 1.\n\tif err := stable.SetUint64(keyCurrentTerm, 1); err != nil {\n\t\treturn fmt.Errorf(\"failed to save current term: %v\", err)\n\t}\n\n\t// Append configuration entry to log.\n\tentry := &Log{\n\t\tIndex: 1,\n\t\tTerm:  1,\n\t}\n\tif conf.ProtocolVersion < 3 {\n\t\tentry.Type = LogRemovePeerDeprecated\n\t\tentry.Data = encodePeers(configuration, trans)\n\t} else {\n\t\tentry.Type = LogConfiguration\n\t\tentry.Data = EncodeConfiguration(configuration)\n\t}\n\tif err := logs.StoreLog(entry); err != nil {\n\t\treturn fmt.Errorf(\"failed to append configuration entry to log: %v\", err)\n\t}\n\n\treturn nil\n}\n\n// RecoverCluster is used to manually force a new configuration in order to\n// recover from a loss of quorum where the current configuration cannot be\n// restored, such as when several servers die at the same time. This works by\n// reading all the current state for this server, creating a snapshot with the\n// supplied configuration, and then truncating the Raft log. This is the only\n// safe way to force a given configuration without actually altering the log to\n// insert any new entries, which could cause conflicts with other servers with\n// different state.\n//\n// WARNING! This operation implicitly commits all entries in the Raft log, so\n// in general this is an extremely unsafe operation. If you've lost your other\n// servers and are performing a manual recovery, then you've also lost the\n// commit information, so this is likely the best you can do, but you should be\n// aware that calling this can cause Raft log entries that were in the process\n// of being replicated but not yet be committed to be committed.\n//\n// Note the FSM passed here is used for the snapshot operations and will be\n// left in a state that should not be used by the application. Be sure to\n// discard this FSM and any associated state and provide a fresh one when\n// calling NewRaft later.\n//\n// A typical way to recover the cluster is to shut down all servers and then\n// run RecoverCluster on every server using an identical configuration. When\n// the cluster is then restarted, and election should occur and then Raft will\n// resume normal operation. If it's desired to make a particular server the\n// leader, this can be used to inject a new configuration with that server as\n// the sole voter, and then join up other new clean-state peer servers using\n// the usual APIs in order to bring the cluster back into a known state.\nfunc RecoverCluster(conf *Config, fsm FSM, logs LogStore, stable StableStore,\n\tsnaps SnapshotStore, trans Transport, configuration Configuration,\n) error {\n\t// Validate the Raft server config.\n\tif err := ValidateConfig(conf); err != nil {\n\t\treturn err\n\t}\n\n\t// Sanity check the Raft peer configuration.\n\tif err := checkConfiguration(configuration); err != nil {\n\t\treturn err\n\t}\n\n\t// Refuse to recover if there's no existing state. This would be safe to\n\t// do, but it is likely an indication of an operator error where they\n\t// expect data to be there and it's not. By refusing, we force them\n\t// to show intent to start a cluster fresh by explicitly doing a\n\t// bootstrap, rather than quietly fire up a fresh cluster here.\n\tif hasState, err := HasExistingState(logs, stable, snaps); err != nil {\n\t\treturn fmt.Errorf(\"failed to check for existing state: %v\", err)\n\t} else if !hasState {\n\t\treturn fmt.Errorf(\"refused to recover cluster with no initial state, this is probably an operator error\")\n\t}\n\n\t// Attempt to restore any snapshots we find, newest to oldest.\n\tvar (\n\t\tsnapshotIndex  uint64\n\t\tsnapshotTerm   uint64\n\t\tsnapshots, err = snaps.List()\n\t)\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to list snapshots: %v\", err)\n\t}\n\n\tlogger := conf.getOrCreateLogger()\n\n\tfor _, snapshot := range snapshots {\n\t\tvar source io.ReadCloser\n\t\t_, source, err = snaps.Open(snapshot.ID)\n\t\tif err != nil {\n\t\t\t// Skip this one and try the next. We will detect if we\n\t\t\t// couldn't open any snapshots.\n\t\t\tcontinue\n\t\t}\n\n\t\t// Note this is the one place we call fsm.Restore without the\n\t\t// fsmRestoreAndMeasure wrapper since this function should only be called to\n\t\t// reset state on disk and the FSM passed will not be used for a running\n\t\t// server instance. If the same process will eventually become a Raft peer\n\t\t// then it will call NewRaft and restore again from disk then which will\n\t\t// report metrics.\n\t\tsnapLogger := logger.With(\n\t\t\t\"id\", snapshot.ID,\n\t\t\t\"last-index\", snapshot.Index,\n\t\t\t\"last-term\", snapshot.Term,\n\t\t\t\"size-in-bytes\", snapshot.Size,\n\t\t)\n\t\tcrc := newCountingReadCloser(source)\n\t\tmonitor := startSnapshotRestoreMonitor(snapLogger, crc, snapshot.Size, false)\n\t\terr = fsm.Restore(crc)\n\t\t// Close the source after the restore has completed\n\t\t_ = source.Close()\n\t\tmonitor.StopAndWait()\n\t\tif err != nil {\n\t\t\t// Same here, skip and try the next one.\n\t\t\tcontinue\n\t\t}\n\n\t\tsnapshotIndex = snapshot.Index\n\t\tsnapshotTerm = snapshot.Term\n\t\tbreak\n\t}\n\tif len(snapshots) > 0 && (snapshotIndex == 0 || snapshotTerm == 0) {\n\t\treturn fmt.Errorf(\"failed to restore any of the available snapshots\")\n\t}\n\n\t// The snapshot information is the best known end point for the data\n\t// until we play back the Raft log entries.\n\tlastIndex := snapshotIndex\n\tlastTerm := snapshotTerm\n\n\t// Apply any Raft log entries past the snapshot.\n\tlastLogIndex, err := logs.LastIndex()\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to find last log: %v\", err)\n\t}\n\tfor index := snapshotIndex + 1; index <= lastLogIndex; index++ {\n\t\tvar entry Log\n\t\tif err = logs.GetLog(index, &entry); err != nil {\n\t\t\treturn fmt.Errorf(\"failed to get log at index %d: %v\", index, err)\n\t\t}\n\t\tif entry.Type == LogCommand {\n\t\t\t_ = fsm.Apply(&entry)\n\t\t}\n\t\tlastIndex = entry.Index\n\t\tlastTerm = entry.Term\n\t}\n\n\t// Create a new snapshot, placing the configuration in as if it was\n\t// committed at index 1.\n\tsnapshot, err := fsm.Snapshot()\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to snapshot FSM: %v\", err)\n\t}\n\tversion := getSnapshotVersion(conf.ProtocolVersion)\n\tsink, err := snaps.Create(version, lastIndex, lastTerm, configuration, 1, trans)\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to create snapshot: %v\", err)\n\t}\n\tif err = snapshot.Persist(sink); err != nil {\n\t\treturn fmt.Errorf(\"failed to persist snapshot: %v\", err)\n\t}\n\tif err = sink.Close(); err != nil {\n\t\treturn fmt.Errorf(\"failed to finalize snapshot: %v\", err)\n\t}\n\n\t// Compact the log so that we don't get bad interference from any\n\t// configuration change log entries that might be there.\n\tfirstLogIndex, err := logs.FirstIndex()\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to get first log index: %v\", err)\n\t}\n\tif err := logs.DeleteRange(firstLogIndex, lastLogIndex); err != nil {\n\t\treturn fmt.Errorf(\"log compaction failed: %v\", err)\n\t}\n\n\treturn nil\n}\n\n// GetConfiguration returns the persisted configuration of the Raft cluster\n// without starting a Raft instance or connecting to the cluster. This function\n// has identical behavior to Raft.GetConfiguration.\nfunc GetConfiguration(conf *Config, fsm FSM, logs LogStore, stable StableStore,\n\tsnaps SnapshotStore, trans Transport,\n) (Configuration, error) {\n\tconf.skipStartup = true\n\tr, err := NewRaft(conf, fsm, logs, stable, snaps, trans)\n\tif err != nil {\n\t\treturn Configuration{}, err\n\t}\n\tfuture := r.GetConfiguration()\n\tif err = future.Error(); err != nil {\n\t\treturn Configuration{}, err\n\t}\n\treturn future.Configuration(), nil\n}\n\n// HasExistingState returns true if the server has any existing state (logs,\n// knowledge of a current term, or any snapshots).\nfunc HasExistingState(logs LogStore, stable StableStore, snaps SnapshotStore) (bool, error) {\n\t// Make sure we don't have a current term.\n\tcurrentTerm, err := stable.GetUint64(keyCurrentTerm)\n\tif err == nil {\n\t\tif currentTerm > 0 {\n\t\t\treturn true, nil\n\t\t}\n\t} else {\n\t\tif err.Error() != \"not found\" {\n\t\t\treturn false, fmt.Errorf(\"failed to read current term: %v\", err)\n\t\t}\n\t}\n\n\t// Make sure we have an empty log.\n\tlastIndex, err := logs.LastIndex()\n\tif err != nil {\n\t\treturn false, fmt.Errorf(\"failed to get last log index: %v\", err)\n\t}\n\tif lastIndex > 0 {\n\t\treturn true, nil\n\t}\n\n\t// Make sure we have no snapshots\n\tsnapshots, err := snaps.List()\n\tif err != nil {\n\t\treturn false, fmt.Errorf(\"failed to list snapshots: %v\", err)\n\t}\n\tif len(snapshots) > 0 {\n\t\treturn true, nil\n\t}\n\n\treturn false, nil\n}\n\n// NewRaft is used to construct a new Raft node. It takes a configuration, as well\n// as implementations of various interfaces that are required. If we have any\n// old state, such as snapshots, logs, peers, etc, all those will be restored\n// when creating the Raft node.\nfunc NewRaft(conf *Config, fsm FSM, logs LogStore, stable StableStore, snaps SnapshotStore, trans Transport) (*Raft, error) {\n\t// Validate the configuration.\n\tif err := ValidateConfig(conf); err != nil {\n\t\treturn nil, err\n\t}\n\n\t// Ensure we have a LogOutput.\n\tlogger := conf.getOrCreateLogger()\n\n\t// Try to restore the current term.\n\tcurrentTerm, err := stable.GetUint64(keyCurrentTerm)\n\tif err != nil && err.Error() != \"not found\" {\n\t\treturn nil, fmt.Errorf(\"failed to load current term: %v\", err)\n\t}\n\n\t// Read the index of the last log entry.\n\tlastIndex, err := logs.LastIndex()\n\tif err != nil {\n\t\treturn nil, fmt.Errorf(\"failed to find last log: %v\", err)\n\t}\n\n\t// Get the last log entry.\n\tvar lastLog Log\n\tif lastIndex > 0 {\n\t\tif err = logs.GetLog(lastIndex, &lastLog); err != nil {\n\t\t\treturn nil, fmt.Errorf(\"failed to get last log at index %d: %v\", lastIndex, err)\n\t\t}\n\t}\n\n\t// Make sure we have a valid server address and ID.\n\tprotocolVersion := conf.ProtocolVersion\n\tlocalAddr := trans.LocalAddr()\n\tlocalID := conf.LocalID\n\n\t// TODO (slackpad) - When we deprecate protocol version 2, remove this\n\t// along with the AddPeer() and RemovePeer() APIs.\n\tif protocolVersion < 3 && string(localID) != string(localAddr) {\n\t\treturn nil, fmt.Errorf(\"when running with ProtocolVersion < 3, LocalID must be set to the network address\")\n\t}\n\n\t// Buffer applyCh to MaxAppendEntries if the option is enabled\n\tapplyCh := make(chan *logFuture)\n\tif conf.BatchApplyCh {\n\t\tapplyCh = make(chan *logFuture, conf.MaxAppendEntries)\n\t}\n\n\t_, transportSupportPreVote := trans.(WithPreVote)\n\t// Create Raft struct.\n\tr := &Raft{\n\t\tprotocolVersion:       protocolVersion,\n\t\tapplyCh:               applyCh,\n\t\tfsm:                   fsm,\n\t\tfsmMutateCh:           make(chan interface{}, 128),\n\t\tfsmSnapshotCh:         make(chan *reqSnapshotFuture),\n\t\tleaderCh:              make(chan bool, 1),\n\t\tlocalID:               localID,\n\t\tlocalAddr:             localAddr,\n\t\tlogger:                logger,\n\t\tlogs:                  logs,\n\t\tconfigurationChangeCh: make(chan *configurationChangeFuture),\n\t\tconfigurations:        configurations{},\n\t\trpcCh:                 trans.Consumer(),\n\t\tsnapshots:             snaps,\n\t\tuserSnapshotCh:        make(chan *userSnapshotFuture),\n\t\tuserRestoreCh:         make(chan *userRestoreFuture),\n\t\tshutdownCh:            make(chan struct{}),\n\t\tstable:                stable,\n\t\ttrans:                 trans,\n\t\tverifyCh:              make(chan *verifyFuture, 64),\n\t\tconfigurationsCh:      make(chan *configurationsFuture, 8),\n\t\tbootstrapCh:           make(chan *bootstrapFuture),\n\t\tobservers:             make(map[uint64]*Observer),\n\t\tleadershipTransferCh:  make(chan *leadershipTransferFuture, 1),\n\t\tleaderNotifyCh:        make(chan struct{}, 1),\n\t\tfollowerNotifyCh:      make(chan struct{}, 1),\n\t\tmainThreadSaturation:  newSaturationMetric([]string{\"raft\", \"thread\", \"main\", \"saturation\"}, 1*time.Second),\n\t\tpreVoteDisabled:       conf.PreVoteDisabled || !transportSupportPreVote,\n\t\tnoLegacyTelemetry:     conf.NoLegacyTelemetry,\n\t}\n\tif !transportSupportPreVote && !conf.PreVoteDisabled {\n\t\tr.logger.Warn(\"pre-vote is disabled because it is not supported by the Transport\")\n\t}\n\n\tr.conf.Store(*conf)\n\n\t// Initialize as a follower.\n\tr.setState(Follower)\n\n\t// Restore the current term and the last log.\n\tr.setCurrentTerm(currentTerm)\n\tr.setLastLog(lastLog.Index, lastLog.Term)\n\n\t// Attempt to restore a snapshot if there are any.\n\tif err := r.restoreSnapshot(); err != nil {\n\t\treturn nil, err\n\t}\n\n\t// Scan through the log for any configuration change entries.\n\tsnapshotIndex, _ := r.getLastSnapshot()\n\tfor index := snapshotIndex + 1; index <= lastLog.Index; index++ {\n\t\tvar entry Log\n\t\tif err := r.logs.GetLog(index, &entry); err != nil {\n\t\t\tr.logger.Error(\"failed to get log\", \"index\", index, \"error\", err)\n\t\t\tpanic(err)\n\t\t}\n\t\tif err := r.processConfigurationLogEntry(&entry); err != nil {\n\t\t\treturn nil, err\n\t\t}\n\t}\n\tr.logger.Info(\"initial configuration\",\n\t\t\"index\", r.configurations.latestIndex,\n\t\t\"servers\", hclog.Fmt(\"%+v\", r.configurations.latest.Servers))\n\n\t// Setup a heartbeat fast-path to avoid head-of-line\n\t// blocking where possible. It MUST be safe for this\n\t// to be called concurrently with a blocking RPC.\n\ttrans.SetHeartbeatHandler(r.processHeartbeat)\n\n\tif conf.skipStartup {\n\t\treturn r, nil\n\t}\n\t// Start the background work.\n\tr.goFunc(r.run)\n\tr.goFunc(r.runFSM)\n\tr.goFunc(r.runSnapshots)\n\treturn r, nil\n}\n\n// restoreSnapshot attempts to restore the latest snapshots, and fails if none\n// of them can be restored. This is called at initialization time, and is\n// completely unsafe to call at any other time.\nfunc (r *Raft) restoreSnapshot() error {\n\tsnapshots, err := r.snapshots.List()\n\tif err != nil {\n\t\tr.logger.Error(\"failed to list snapshots\", \"error\", err)\n\t\treturn err\n\t}\n\n\t// Try to load in order of newest to oldest\n\tfor _, snapshot := range snapshots {\n\t\tif success := r.tryRestoreSingleSnapshot(snapshot); !success {\n\t\t\tcontinue\n\t\t}\n\n\t\t// Update the lastApplied so we don't replay old logs\n\t\tr.setLastApplied(snapshot.Index)\n\n\t\t// Update the last stable snapshot info\n\t\tr.setLastSnapshot(snapshot.Index, snapshot.Term)\n\n\t\t// Update the configuration\n\t\tvar conf Configuration\n\t\tvar index uint64\n\t\tif snapshot.Version > 0 {\n\t\t\tconf = snapshot.Configuration\n\t\t\tindex = snapshot.ConfigurationIndex\n\t\t} else {\n\t\t\tvar err error\n\t\t\tif conf, err = decodePeers(snapshot.Peers, r.trans); err != nil {\n\t\t\t\treturn err\n\t\t\t}\n\t\t\tindex = snapshot.Index\n\t\t}\n\t\tr.setCommittedConfiguration(conf, index)\n\t\tr.setLatestConfiguration(conf, index)\n\n\t\t// Success!\n\t\treturn nil\n\t}\n\n\t// If we had snapshots and failed to load them, its an error\n\tif len(snapshots) > 0 {\n\t\treturn fmt.Errorf(\"failed to load any existing snapshots\")\n\t}\n\treturn nil\n}\n\nfunc (r *Raft) tryRestoreSingleSnapshot(snapshot *SnapshotMeta) bool {\n\tif r.config().NoSnapshotRestoreOnStart {\n\t\treturn true\n\t}\n\n\tsnapLogger := r.logger.With(\n\t\t\"id\", snapshot.ID,\n\t\t\"last-index\", snapshot.Index,\n\t\t\"last-term\", snapshot.Term,\n\t\t\"size-in-bytes\", snapshot.Size,\n\t)\n\n\tsnapLogger.Info(\"starting restore from snapshot\")\n\n\t_, source, err := r.snapshots.Open(snapshot.ID)\n\tif err != nil {\n\t\tsnapLogger.Error(\"failed to open snapshot\", \"error\", err)\n\t\treturn false\n\t}\n\n\tif err := fsmRestoreAndMeasure(snapLogger, r.fsm, source, snapshot.Size); err != nil {\n\t\t_ = source.Close()\n\t\tsnapLogger.Error(\"failed to restore snapshot\", \"error\", err)\n\t\treturn false\n\t}\n\t_ = source.Close()\n\n\tsnapLogger.Info(\"restored from snapshot\")\n\n\treturn true\n}\n\nfunc (r *Raft) config() Config {\n\treturn r.conf.Load().(Config)\n}\n\n// ReloadConfig updates the configuration of a running raft node. If the new\n// configuration is invalid an error is returned and no changes made to the\n// instance. All fields will be copied from rc into the new configuration, even\n// if they are zero valued.\nfunc (r *Raft) ReloadConfig(rc ReloadableConfig) error {\n\tr.confReloadMu.Lock()\n\tdefer r.confReloadMu.Unlock()\n\n\t// Load the current config (note we are under a lock so it can't be changed\n\t// between this read and a later Store).\n\toldCfg := r.config()\n\n\t// Set the reloadable fields\n\tnewCfg := rc.apply(oldCfg)\n\n\tif err := ValidateConfig(&newCfg); err != nil {\n\t\treturn err\n\t}\n\tr.conf.Store(newCfg)\n\n\tif rc.HeartbeatTimeout < oldCfg.HeartbeatTimeout {\n\t\t// On leader, ensure replication loops running with a longer\n\t\t// timeout than what we want now discover the change.\n\t\tasyncNotifyCh(r.leaderNotifyCh)\n\t\t// On follower, update current timer to use the shorter new value.\n\t\tasyncNotifyCh(r.followerNotifyCh)\n\t}\n\treturn nil\n}\n\n// ReloadableConfig returns the current state of the reloadable fields in Raft's\n// configuration. This is useful for programs to discover the current state for\n// reporting to users or tests. It is safe to call from any goroutine. It is\n// intended for reporting and testing purposes primarily; external\n// synchronization would be required to safely use this in a read-modify-write\n// pattern for reloadable configuration options.\nfunc (r *Raft) ReloadableConfig() ReloadableConfig {\n\tcfg := r.config()\n\tvar rc ReloadableConfig\n\trc.fromConfig(cfg)\n\treturn rc\n}\n\n// BootstrapCluster is equivalent to non-member BootstrapCluster but can be\n// called on an un-bootstrapped Raft instance after it has been created. This\n// should only be called at the beginning of time for the cluster with an\n// identical configuration listing all Voter servers. There is no need to\n// bootstrap Nonvoter and Staging servers.\n//\n// A cluster can only be bootstrapped once from a single participating Voter\n// server. Any further attempts to bootstrap will return an error that can be\n// safely ignored.\n//\n// One sane approach is to bootstrap a single server with a configuration\n// listing just itself as a Voter, then invoke AddVoter() on it to add other\n// servers to the cluster.\nfunc (r *Raft) BootstrapCluster(configuration Configuration) Future {\n\tbootstrapReq := &bootstrapFuture{}\n\tbootstrapReq.init()\n\tbootstrapReq.configuration = configuration\n\tselect {\n\tcase <-r.shutdownCh:\n\t\treturn errorFuture{ErrRaftShutdown}\n\tcase r.bootstrapCh <- bootstrapReq:\n\t\treturn bootstrapReq\n\t}\n}\n\n// Leader is used to return the current leader of the cluster.\n// Deprecated: use LeaderWithID instead\n// It may return empty string if there is no current leader\n// or the leader is unknown.\n// Deprecated: use LeaderWithID instead.\nfunc (r *Raft) Leader() ServerAddress {\n\tr.leaderLock.RLock()\n\tleaderAddr := r.leaderAddr\n\tr.leaderLock.RUnlock()\n\treturn leaderAddr\n}\n\n// LeaderWithID is used to return the current leader address and ID of the cluster.\n// It may return empty strings if there is no current leader\n// or the leader is unknown.\nfunc (r *Raft) LeaderWithID() (ServerAddress, ServerID) {\n\tr.leaderLock.RLock()\n\tleaderAddr := r.leaderAddr\n\tleaderID := r.leaderID\n\tr.leaderLock.RUnlock()\n\treturn leaderAddr, leaderID\n}\n\n// Apply is used to apply a command to the FSM in a highly consistent\n// manner. This returns a future that can be used to wait on the application.\n// An optional timeout can be provided to limit the amount of time we wait\n// for the command to be started. This must be run on the leader or it\n// will fail.\n//\n// If the node discovers it is no longer the leader while applying the command,\n// it will return ErrLeadershipLost. There is no way to guarantee whether the\n// write succeeded or failed in this case. For example, if the leader is\n// partitioned it can't know if a quorum of followers wrote the log to disk. If\n// at least one did, it may survive into the next leader's term.\n//\n// If a user snapshot is restored while the command is in-flight, an\n// ErrAbortedByRestore is returned. In this case the write effectively failed\n// since its effects will not be present in the FSM after the restore.\nfunc (r *Raft) Apply(cmd []byte, timeout time.Duration) ApplyFuture {\n\treturn r.ApplyLog(Log{Data: cmd}, timeout)\n}\n\n// ApplyLog performs Apply but takes in a Log directly. The only values\n// currently taken from the submitted Log are Data and Extensions. See\n// Apply for details on error cases.\nfunc (r *Raft) ApplyLog(log Log, timeout time.Duration) ApplyFuture {\n\tmetrics.IncrCounter([]string{\"raft\", \"apply\"}, 1)\n\n\tvar timer <-chan time.Time\n\tif timeout > 0 {\n\t\ttimer = time.After(timeout)\n\t}\n\n\t// Create a log future, no index or term yet\n\tlogFuture := &logFuture{\n\t\tlog: Log{\n\t\t\tType:       LogCommand,\n\t\t\tData:       log.Data,\n\t\t\tExtensions: log.Extensions,\n\t\t},\n\t}\n\tlogFuture.init()\n\n\tselect {\n\tcase <-timer:\n\t\treturn errorFuture{ErrEnqueueTimeout}\n\tcase <-r.shutdownCh:\n\t\treturn errorFuture{ErrRaftShutdown}\n\tcase r.applyCh <- logFuture:\n\t\treturn logFuture\n\t}\n}\n\n// Barrier is used to issue a command that blocks until all preceding\n// operations have been applied to the FSM. It can be used to ensure the\n// FSM reflects all queued writes. An optional timeout can be provided to\n// limit the amount of time we wait for the command to be started. This\n// must be run on the leader, or it will fail.\nfunc (r *Raft) Barrier(timeout time.Duration) Future {\n\tmetrics.IncrCounter([]string{\"raft\", \"barrier\"}, 1)\n\tvar timer <-chan time.Time\n\tif timeout > 0 {\n\t\ttimer = time.After(timeout)\n\t}\n\n\t// Create a log future, no index or term yet\n\tlogFuture := &logFuture{log: Log{Type: LogBarrier}}\n\tlogFuture.init()\n\n\tselect {\n\tcase <-timer:\n\t\treturn errorFuture{ErrEnqueueTimeout}\n\tcase <-r.shutdownCh:\n\t\treturn errorFuture{ErrRaftShutdown}\n\tcase r.applyCh <- logFuture:\n\t\treturn logFuture\n\t}\n}\n\n// VerifyLeader is used to ensure this peer is still the leader. It may be used\n// to prevent returning stale data from the FSM after the peer has lost\n// leadership.\nfunc (r *Raft) VerifyLeader() Future {\n\tmetrics.IncrCounter([]string{\"raft\", \"verify_leader\"}, 1)\n\tverifyFuture := &verifyFuture{}\n\tverifyFuture.init()\n\tselect {\n\tcase <-r.shutdownCh:\n\t\treturn errorFuture{ErrRaftShutdown}\n\tcase r.verifyCh <- verifyFuture:\n\t\treturn verifyFuture\n\t}\n}\n\n// GetConfiguration returns the latest configuration. This may not yet be\n// committed. The main loop can access this directly.\nfunc (r *Raft) GetConfiguration() ConfigurationFuture {\n\tconfigReq := &configurationsFuture{}\n\tconfigReq.init()\n\tconfigReq.configurations = configurations{latest: r.getLatestConfiguration()}\n\tconfigReq.respond(nil)\n\treturn configReq\n}\n\n// AddPeer to the cluster configuration. Must be run on the leader, or it will fail.\n//\n// Deprecated: Use AddVoter/AddNonvoter instead.\nfunc (r *Raft) AddPeer(peer ServerAddress) Future {\n\tif r.protocolVersion > 2 {\n\t\treturn errorFuture{ErrUnsupportedProtocol}\n\t}\n\n\treturn r.requestConfigChange(configurationChangeRequest{\n\t\tcommand:       AddVoter,\n\t\tserverID:      ServerID(peer),\n\t\tserverAddress: peer,\n\t\tprevIndex:     0,\n\t}, 0)\n}\n\n// RemovePeer from the cluster configuration. If the current leader is being\n// removed, it will cause a new election to occur. Must be run on the leader,\n// or it will fail.\n\n// Deprecated: Use RemoveServer instead.\nfunc (r *Raft) RemovePeer(peer ServerAddress) Future {\n\tif r.protocolVersion > 2 {\n\t\treturn errorFuture{ErrUnsupportedProtocol}\n\t}\n\n\treturn r.requestConfigChange(configurationChangeRequest{\n\t\tcommand:   RemoveServer,\n\t\tserverID:  ServerID(peer),\n\t\tprevIndex: 0,\n\t}, 0)\n}\n\n// AddVoter will add the given server to the cluster as a staging server. If the\n// server is already in the cluster as a voter, this updates the server's address.\n// This must be run on the leader or it will fail. The leader will promote the\n// staging server to a voter once that server is ready. If nonzero, prevIndex is\n// the index of the only configuration upon which this change may be applied; if\n// another configuration entry has been added in the meantime, this request will\n// fail. If nonzero, timeout is how long this server should wait before the\n// configuration change log entry is appended.\nfunc (r *Raft) AddVoter(id ServerID, address ServerAddress, prevIndex uint64, timeout time.Duration) IndexFuture {\n\tif r.protocolVersion < 2 {\n\t\treturn errorFuture{ErrUnsupportedProtocol}\n\t}\n\n\treturn r.requestConfigChange(configurationChangeRequest{\n\t\tcommand:       AddVoter,\n\t\tserverID:      id,\n\t\tserverAddress: address,\n\t\tprevIndex:     prevIndex,\n\t}, timeout)\n}\n\n// AddNonvoter will add the given server to the cluster but won't assign it a\n// vote. The server will receive log entries, but it won't participate in\n// elections or log entry commitment. If the server is already in the cluster,\n// this updates the server's address. This must be run on the leader or it will\n// fail. For prevIndex and timeout, see AddVoter.\nfunc (r *Raft) AddNonvoter(id ServerID, address ServerAddress, prevIndex uint64, timeout time.Duration) IndexFuture {\n\tif r.protocolVersion < 3 {\n\t\treturn errorFuture{ErrUnsupportedProtocol}\n\t}\n\n\treturn r.requestConfigChange(configurationChangeRequest{\n\t\tcommand:       AddNonvoter,\n\t\tserverID:      id,\n\t\tserverAddress: address,\n\t\tprevIndex:     prevIndex,\n\t}, timeout)\n}\n\n// RemoveServer will remove the given server from the cluster. If the current\n// leader is being removed, it will cause a new election to occur. This must be\n// run on the leader or it will fail. For prevIndex and timeout, see AddVoter.\nfunc (r *Raft) RemoveServer(id ServerID, prevIndex uint64, timeout time.Duration) IndexFuture {\n\tif r.protocolVersion < 2 {\n\t\treturn errorFuture{ErrUnsupportedProtocol}\n\t}\n\n\treturn r.requestConfigChange(configurationChangeRequest{\n\t\tcommand:   RemoveServer,\n\t\tserverID:  id,\n\t\tprevIndex: prevIndex,\n\t}, timeout)\n}\n\n// DemoteVoter will take away a server's vote, if it has one. If present, the\n// server will continue to receive log entries, but it won't participate in\n// elections or log entry commitment. If the server is not in the cluster, this\n// does nothing. This must be run on the leader or it will fail. For prevIndex\n// and timeout, see AddVoter.\nfunc (r *Raft) DemoteVoter(id ServerID, prevIndex uint64, timeout time.Duration) IndexFuture {\n\tif r.protocolVersion < 3 {\n\t\treturn errorFuture{ErrUnsupportedProtocol}\n\t}\n\n\treturn r.requestConfigChange(configurationChangeRequest{\n\t\tcommand:   DemoteVoter,\n\t\tserverID:  id,\n\t\tprevIndex: prevIndex,\n\t}, timeout)\n}\n\n// Shutdown is used to stop the Raft background routines.\n// This is not a graceful operation. Provides a future that\n// can be used to block until all background routines have exited.\nfunc (r *Raft) Shutdown() Future {\n\tr.shutdownLock.Lock()\n\tdefer r.shutdownLock.Unlock()\n\n\tif !r.shutdown {\n\t\tclose(r.shutdownCh)\n\t\tr.shutdown = true\n\t\tr.setState(Shutdown)\n\t\treturn &shutdownFuture{r}\n\t}\n\n\t// avoid closing transport twice\n\treturn &shutdownFuture{nil}\n}\n\n// Snapshot is used to manually force Raft to take a snapshot. Returns a future\n// that can be used to block until complete, and that contains a function that\n// can be used to open the snapshot.\nfunc (r *Raft) Snapshot() SnapshotFuture {\n\tfuture := &userSnapshotFuture{}\n\tfuture.init()\n\tselect {\n\tcase r.userSnapshotCh <- future:\n\t\treturn future\n\tcase <-r.shutdownCh:\n\t\tfuture.respond(ErrRaftShutdown)\n\t\treturn future\n\t}\n}\n\n// Restore is used to manually force Raft to consume an external snapshot, such\n// as if restoring from a backup. We will use the current Raft configuration,\n// not the one from the snapshot, so that we can restore into a new cluster. We\n// will also use the max of the index of the snapshot, or the current index,\n// and then add 1 to that, so we force a new state with a hole in the Raft log,\n// so that the snapshot will be sent to followers and used for any new joiners.\n// This can only be run on the leader, and blocks until the restore is complete\n// or an error occurs.\n//\n// WARNING! This operation has the leader take on the state of the snapshot and\n// then sets itself up so that it replicates that to its followers though the\n// install snapshot process. This involves a potentially dangerous period where\n// the leader commits ahead of its followers, so should only be used for disaster\n// recovery into a fresh cluster, and should not be used in normal operations.\nfunc (r *Raft) Restore(meta *SnapshotMeta, reader io.Reader, timeout time.Duration) error {\n\tmetrics.IncrCounter([]string{\"raft\", \"restore\"}, 1)\n\tvar timer <-chan time.Time\n\tif timeout > 0 {\n\t\ttimer = time.After(timeout)\n\t}\n\n\t// Perform the restore.\n\trestore := &userRestoreFuture{\n\t\tmeta:   meta,\n\t\treader: reader,\n\t}\n\trestore.init()\n\tselect {\n\tcase <-timer:\n\t\treturn ErrEnqueueTimeout\n\tcase <-r.shutdownCh:\n\t\treturn ErrRaftShutdown\n\tcase r.userRestoreCh <- restore:\n\t\t// If the restore is ingested then wait for it to complete.\n\t\tif err := restore.Error(); err != nil {\n\t\t\treturn err\n\t\t}\n\t}\n\n\t// Apply a no-op log entry. Waiting for this allows us to wait until the\n\t// followers have gotten the restore and replicated at least this new\n\t// entry, which shows that we've also faulted and installed the\n\t// snapshot with the contents of the restore.\n\tnoop := &logFuture{\n\t\tlog: Log{\n\t\t\tType: LogNoop,\n\t\t},\n\t}\n\tnoop.init()\n\tselect {\n\tcase <-timer:\n\t\treturn ErrEnqueueTimeout\n\tcase <-r.shutdownCh:\n\t\treturn ErrRaftShutdown\n\tcase r.applyCh <- noop:\n\t\treturn noop.Error()\n\t}\n}\n\n// State returns the state of this raft peer.\nfunc (r *Raft) State() RaftState {\n\treturn r.getState()\n}\n\n// LeaderCh is used to get a channel which delivers signals on acquiring or\n// losing leadership. It sends true if we become the leader, and false if we\n// lose it.\n//\n// Receivers can expect to receive a notification only if leadership\n// transition has occurred.\n//\n// If receivers aren't ready for the signal, signals may drop and only the\n// latest leadership transition. For example, if a receiver receives subsequent\n// `true` values, they may deduce that leadership was lost and regained while\n// the receiver was processing first leadership transition.\nfunc (r *Raft) LeaderCh() <-chan bool {\n\treturn r.leaderCh\n}\n\n// String returns a string representation of this Raft node.\nfunc (r *Raft) String() string {\n\treturn fmt.Sprintf(\"Node at %s [%v]\", r.localAddr, r.getState())\n}\n\n// LastContact returns the time of last contact by a leader.\n// This only makes sense if we are currently a follower.\nfunc (r *Raft) LastContact() time.Time {\n\tr.lastContactLock.RLock()\n\tlast := r.lastContact\n\tr.lastContactLock.RUnlock()\n\treturn last\n}\n\n// Stats is used to return a map of various internal stats. This\n// should only be used for informative purposes or debugging.\n//\n// Keys are: \"state\", \"term\", \"last_log_index\", \"last_log_term\",\n// \"commit_index\", \"applied_index\", \"fsm_pending\",\n// \"last_snapshot_index\", \"last_snapshot_term\",\n// \"latest_configuration\", \"last_contact\", and \"num_peers\".\n//\n// The value of \"state\" is a numeric constant representing one of\n// the possible leadership states the node is in at any given time.\n// the possible states are: \"Follower\", \"Candidate\", \"Leader\", \"Shutdown\".\n//\n// The value of \"latest_configuration\" is a string which contains\n// the id of each server, its suffrage status, and its address.\n//\n// The value of \"last_contact\" is either \"never\" if there\n// has been no contact with a leader, \"0\" if the node is in the\n// leader state, or the time since last contact with a leader\n// formatted as a string.\n//\n// The value of \"num_peers\" is the number of other voting servers in the\n// cluster, not including this node. If this node isn't part of the\n// configuration then this will be \"0\".\n//\n// All other values are uint64s, formatted as strings.\nfunc (r *Raft) Stats() map[string]string {\n\ttoString := func(v uint64) string {\n\t\treturn strconv.FormatUint(v, 10)\n\t}\n\tlastLogIndex, lastLogTerm := r.getLastLog()\n\tlastSnapIndex, lastSnapTerm := r.getLastSnapshot()\n\ts := map[string]string{\n\t\t\"state\":                r.getState().String(),\n\t\t\"term\":                 toString(r.getCurrentTerm()),\n\t\t\"last_log_index\":       toString(lastLogIndex),\n\t\t\"last_log_term\":        toString(lastLogTerm),\n\t\t\"commit_index\":         toString(r.getCommitIndex()),\n\t\t\"applied_index\":        toString(r.getLastApplied()),\n\t\t\"fsm_pending\":          toString(uint64(len(r.fsmMutateCh))),\n\t\t\"last_snapshot_index\":  toString(lastSnapIndex),\n\t\t\"last_snapshot_term\":   toString(lastSnapTerm),\n\t\t\"protocol_version\":     toString(uint64(r.protocolVersion)),\n\t\t\"protocol_version_min\": toString(uint64(ProtocolVersionMin)),\n\t\t\"protocol_version_max\": toString(uint64(ProtocolVersionMax)),\n\t\t\"snapshot_version_min\": toString(uint64(SnapshotVersionMin)),\n\t\t\"snapshot_version_max\": toString(uint64(SnapshotVersionMax)),\n\t}\n\n\tfuture := r.GetConfiguration()\n\tif err := future.Error(); err != nil {\n\t\tr.logger.Warn(\"could not get configuration for stats\", \"error\", err)\n\t} else {\n\t\tconfiguration := future.Configuration()\n\t\ts[\"latest_configuration_index\"] = toString(future.Index())\n\t\ts[\"latest_configuration\"] = fmt.Sprintf(\"%+v\", configuration.Servers)\n\n\t\t// This is a legacy metric that we've seen people use in the wild.\n\t\thasUs := false\n\t\tnumPeers := 0\n\t\tfor _, server := range configuration.Servers {\n\t\t\tif server.Suffrage == Voter {\n\t\t\t\tif server.ID == r.localID {\n\t\t\t\t\thasUs = true\n\t\t\t\t} else {\n\t\t\t\t\tnumPeers++\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t\tif !hasUs {\n\t\t\tnumPeers = 0\n\t\t}\n\t\ts[\"num_peers\"] = toString(uint64(numPeers))\n\t}\n\n\tlast := r.LastContact()\n\tif r.getState() == Leader {\n\t\ts[\"last_contact\"] = \"0\"\n\t} else if last.IsZero() {\n\t\ts[\"last_contact\"] = \"never\"\n\t} else {\n\t\ts[\"last_contact\"] = fmt.Sprintf(\"%v\", time.Since(last))\n\t}\n\treturn s\n}\n\n// CurrentTerm returns the current term.\nfunc (r *Raft) CurrentTerm() uint64 {\n\treturn r.getCurrentTerm()\n}\n\n// LastIndex returns the last index in stable storage,\n// either from the last log or from the last snapshot.\nfunc (r *Raft) LastIndex() uint64 {\n\treturn r.getLastIndex()\n}\n\n// CommitIndex returns the committed index.\n// This API maybe helpful for server to implement the read index optimization\n// as described in the Raft paper.\nfunc (r *Raft) CommitIndex() uint64 {\n\treturn r.getCommitIndex()\n}\n\n// AppliedIndex returns the last index applied to the FSM. This is generally\n// lagging behind the last index, especially for indexes that are persisted but\n// have not yet been considered committed by the leader. NOTE - this reflects\n// the last index that was sent to the application's FSM over the apply channel\n// but DOES NOT mean that the application's FSM has yet consumed it and applied\n// it to its internal state. Thus, the application's state may lag behind this\n// index.\nfunc (r *Raft) AppliedIndex() uint64 {\n\treturn r.getLastApplied()\n}\n\n// LeadershipTransfer will transfer leadership to a server in the cluster.\n// This can only be called from the leader, or it will fail. The leader will\n// stop accepting client requests, make sure the target server is up to date\n// and starts the transfer with a TimeoutNow message. This message has the same\n// effect as if the election timeout on the target server fires. Since\n// it is unlikely that another server is starting an election, it is very\n// likely that the target server is able to win the election.  Note that raft\n// protocol version 3 is not sufficient to use LeadershipTransfer. A recent\n// version of that library has to be used that includes this feature.  Using\n// transfer leadership is safe however in a cluster where not every node has\n// the latest version. If a follower cannot be promoted, it will fail\n// gracefully.\nfunc (r *Raft) LeadershipTransfer() Future {\n\tif r.protocolVersion < 3 {\n\t\treturn errorFuture{ErrUnsupportedProtocol}\n\t}\n\n\treturn r.initiateLeadershipTransfer(nil, nil)\n}\n\n// LeadershipTransferToServer does the same as LeadershipTransfer but takes a\n// server in the arguments in case a leadership should be transitioned to a\n// specific server in the cluster.  Note that raft protocol version 3 is not\n// sufficient to use LeadershipTransfer. A recent version of that library has\n// to be used that includes this feature. Using transfer leadership is safe\n// however in a cluster where not every node has the latest version. If a\n// follower cannot be promoted, it will fail gracefully.\nfunc (r *Raft) LeadershipTransferToServer(id ServerID, address ServerAddress) Future {\n\tif r.protocolVersion < 3 {\n\t\treturn errorFuture{ErrUnsupportedProtocol}\n\t}\n\n\treturn r.initiateLeadershipTransfer(&id, &address)\n}\n"
  },
  {
    "path": "bench/bench.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raftbench\n\n// raftbench provides common benchmarking functions which can be used by\n// anything which implements the raft.LogStore and raft.StableStore interfaces.\n// All functions accept these interfaces and perform benchmarking. This\n// makes comparing backend performance easier by sharing the tests.\n\nimport (\n\t\"testing\"\n\n\t\"github.com/hashicorp/raft\"\n)\n\nfunc FirstIndex(b *testing.B, store raft.LogStore) {\n\t// Create some fake data\n\tvar logs []*raft.Log\n\tfor i := 1; i < 10; i++ {\n\t\tlogs = append(logs, &raft.Log{Index: uint64(i), Data: []byte(\"data\")})\n\t}\n\tif err := store.StoreLogs(logs); err != nil {\n\t\tb.Fatalf(\"err: %s\", err)\n\t}\n\tb.ResetTimer()\n\n\t// Run FirstIndex a number of times\n\tfor n := 0; n < b.N; n++ {\n\t\t_, _ = store.FirstIndex()\n\t}\n}\n\nfunc LastIndex(b *testing.B, store raft.LogStore) {\n\t// Create some fake data\n\tvar logs []*raft.Log\n\tfor i := 1; i < 10; i++ {\n\t\tlogs = append(logs, &raft.Log{Index: uint64(i), Data: []byte(\"data\")})\n\t}\n\tif err := store.StoreLogs(logs); err != nil {\n\t\tb.Fatalf(\"err: %s\", err)\n\t}\n\tb.ResetTimer()\n\n\t// Run LastIndex a number of times\n\tfor n := 0; n < b.N; n++ {\n\t\t_, _ = store.LastIndex()\n\t}\n}\n\nfunc GetLog(b *testing.B, store raft.LogStore) {\n\t// Create some fake data\n\tvar logs []*raft.Log\n\tfor i := 1; i < 10; i++ {\n\t\tlogs = append(logs, &raft.Log{Index: uint64(i), Data: []byte(\"data\")})\n\t}\n\tif err := store.StoreLogs(logs); err != nil {\n\t\tb.Fatalf(\"err: %s\", err)\n\t}\n\tb.ResetTimer()\n\n\t// Run GetLog a number of times\n\tfor n := 0; n < b.N; n++ {\n\t\tif err := store.GetLog(5, new(raft.Log)); err != nil {\n\t\t\tb.Fatalf(\"err: %s\", err)\n\t\t}\n\t}\n}\n\nfunc StoreLog(b *testing.B, store raft.LogStore) {\n\t// Run StoreLog a number of times\n\tfor n := 0; n < b.N; n++ {\n\t\tlog := &raft.Log{Index: uint64(n), Data: []byte(\"data\")}\n\t\tif err := store.StoreLog(log); err != nil {\n\t\t\tb.Fatalf(\"err: %s\", err)\n\t\t}\n\t}\n}\n\nfunc StoreLogs(b *testing.B, store raft.LogStore) {\n\t// Run StoreLogs a number of times. We want to set multiple logs each\n\t// run, so we create 3 logs with incrementing indexes for each iteration.\n\tfor n := 0; n < b.N; n++ {\n\t\tb.StopTimer()\n\t\toffset := 3 * (n + 1)\n\t\tlogs := []*raft.Log{\n\t\t\t{Index: uint64(offset - 2), Data: []byte(\"data\")},\n\t\t\t{Index: uint64(offset - 1), Data: []byte(\"data\")},\n\t\t\t{Index: uint64(offset), Data: []byte(\"data\")},\n\t\t}\n\t\tb.StartTimer()\n\n\t\tif err := store.StoreLogs(logs); err != nil {\n\t\t\tb.Fatalf(\"err: %s\", err)\n\t\t}\n\t}\n}\n\nfunc DeleteRange(b *testing.B, store raft.LogStore) {\n\t// Create some fake data. In this case, we create 3 new log entries for each\n\t// test case, and separate them by index in multiples of 10. This allows\n\t// some room so that we can test deleting ranges with \"extra\" logs\n\t// to ensure we stop going to the database once our max index is hit.\n\tvar logs []*raft.Log\n\tfor n := 0; n < b.N; n++ {\n\t\toffset := 10 * n\n\t\tfor i := offset; i < offset+3; i++ {\n\t\t\tlogs = append(logs, &raft.Log{Index: uint64(i), Data: []byte(\"data\")})\n\t\t}\n\t}\n\tif err := store.StoreLogs(logs); err != nil {\n\t\tb.Fatalf(\"err: %s\", err)\n\t}\n\tb.ResetTimer()\n\n\t// Delete a range of the data\n\tfor n := 0; n < b.N; n++ {\n\t\toffset := 10 * n\n\t\tif err := store.DeleteRange(uint64(offset), uint64(offset+9)); err != nil {\n\t\t\tb.Fatalf(\"err: %s\", err)\n\t\t}\n\t}\n}\n\nfunc Set(b *testing.B, store raft.StableStore) {\n\t// Run Set a number of times\n\tfor n := 0; n < b.N; n++ {\n\t\tif err := store.Set([]byte{byte(n)}, []byte(\"val\")); err != nil {\n\t\t\tb.Fatalf(\"err: %s\", err)\n\t\t}\n\t}\n}\n\nfunc Get(b *testing.B, store raft.StableStore) {\n\t// Create some fake data\n\tfor i := 1; i < 10; i++ {\n\t\tif err := store.Set([]byte{byte(i)}, []byte(\"val\")); err != nil {\n\t\t\tb.Fatalf(\"err: %s\", err)\n\t\t}\n\t}\n\tb.ResetTimer()\n\n\t// Run Get a number of times\n\tfor n := 0; n < b.N; n++ {\n\t\tif _, err := store.Get([]byte{0x05}); err != nil {\n\t\t\tb.Fatalf(\"err: %s\", err)\n\t\t}\n\t}\n}\n\nfunc SetUint64(b *testing.B, store raft.StableStore) {\n\t// Run SetUint64 a number of times\n\tfor n := 0; n < b.N; n++ {\n\t\tif err := store.SetUint64([]byte{byte(n)}, uint64(n)); err != nil {\n\t\t\tb.Fatalf(\"err: %s\", err)\n\t\t}\n\t}\n}\n\nfunc GetUint64(b *testing.B, store raft.StableStore) {\n\t// Create some fake data\n\tfor i := 0; i < 10; i++ {\n\t\tif err := store.SetUint64([]byte{byte(i)}, uint64(i)); err != nil {\n\t\t\tb.Fatalf(\"err: %s\", err)\n\t\t}\n\t}\n\tb.ResetTimer()\n\n\t// Run GetUint64 a number of times\n\tfor n := 0; n < b.N; n++ {\n\t\tif _, err := store.GetUint64([]byte{0x05}); err != nil {\n\t\t\tb.Fatalf(\"err: %s\", err)\n\t\t}\n\t}\n}\n"
  },
  {
    "path": "bench_test.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"testing\"\n\t\"time\"\n\n\t\"github.com/hashicorp/go-hclog\"\n)\n\nfunc BenchmarkStoreLogInMem(b *testing.B) {\n\tconf := DefaultConfig()\n\tconf.LocalID = \"first\"\n\tconf.HeartbeatTimeout = 50 * time.Millisecond\n\tconf.ElectionTimeout = 50 * time.Millisecond\n\tconf.LeaderLeaseTimeout = 50 * time.Millisecond\n\tconf.CommitTimeout = 5 * time.Millisecond\n\tconf.SnapshotThreshold = 100\n\tconf.TrailingLogs = 10\n\tconf.LogLevel = \"OFF\"\n\traft := MakeRaft(b, conf, true)\n\traft.logger.SetLevel(hclog.Off)\n\n\tNoErr(WaitFor(raft, Leader), b)\n\n\tapplyAndWait := func(leader *RaftEnv, n, sz int) {\n\t\t// Do some commits\n\t\tvar futures []ApplyFuture\n\t\tfor i := 0; i < n; i++ {\n\t\t\tfutures = append(futures, leader.raft.Apply(logBytes(i, sz), 0))\n\t\t}\n\t\tfor _, f := range futures {\n\t\t\tNoErr(WaitFuture(f), b)\n\t\t\tleader.logger.Debug(\"applied\", \"index\", f.Index(), \"size\", sz)\n\t\t}\n\t}\n\n\tfor i := 0; i < b.N; i++ {\n\t\t// Do some commits\n\t\tapplyAndWait(raft, 100, 10)\n\t\t// Do a snapshot\n\t\tNoErr(WaitFuture(raft.raft.Snapshot()), b)\n\t}\n}\n"
  },
  {
    "path": "commands.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\n// RPCHeader is a common sub-structure used to pass along protocol version and\n// other information about the cluster. For older Raft implementations before\n// versioning was added this will default to a zero-valued structure when read\n// by newer Raft versions.\ntype RPCHeader struct {\n\t// ProtocolVersion is the version of the protocol the sender is\n\t// speaking.\n\tProtocolVersion ProtocolVersion\n\t// ID is the ServerID of the node sending the RPC Request or Response\n\tID []byte\n\t// Addr is the ServerAddr of the node sending the RPC Request or Response\n\tAddr []byte\n}\n\n// WithRPCHeader is an interface that exposes the RPC header.\ntype WithRPCHeader interface {\n\tGetRPCHeader() RPCHeader\n}\n\n// AppendEntriesRequest is the command used to append entries to the\n// replicated log.\ntype AppendEntriesRequest struct {\n\tRPCHeader\n\n\t// Provide the current term and leader\n\tTerm uint64\n\n\t// Deprecated: use RPCHeader.Addr instead\n\tLeader []byte\n\n\t// Provide the previous entries for integrity checking\n\tPrevLogEntry uint64\n\tPrevLogTerm  uint64\n\n\t// New entries to commit\n\tEntries []*Log\n\n\t// Commit index on the leader\n\tLeaderCommitIndex uint64\n}\n\n// GetRPCHeader - See WithRPCHeader.\nfunc (r *AppendEntriesRequest) GetRPCHeader() RPCHeader {\n\treturn r.RPCHeader\n}\n\n// AppendEntriesResponse is the response returned from an\n// AppendEntriesRequest.\ntype AppendEntriesResponse struct {\n\tRPCHeader\n\n\t// Newer term if leader is out of date\n\tTerm uint64\n\n\t// Last Log is a hint to help accelerate rebuilding slow nodes\n\tLastLog uint64\n\n\t// We may not succeed if we have a conflicting entry\n\tSuccess bool\n\n\t// There are scenarios where this request didn't succeed\n\t// but there's no need to wait/back-off the next attempt.\n\tNoRetryBackoff bool\n}\n\n// GetRPCHeader - See WithRPCHeader.\nfunc (r *AppendEntriesResponse) GetRPCHeader() RPCHeader {\n\treturn r.RPCHeader\n}\n\n// RequestVoteRequest is the command used by a candidate to ask a Raft peer\n// for a vote in an election.\ntype RequestVoteRequest struct {\n\tRPCHeader\n\n\t// Provide the term and our id\n\tTerm uint64\n\n\t// Deprecated: use RPCHeader.Addr instead\n\tCandidate []byte\n\n\t// Used to ensure safety\n\tLastLogIndex uint64\n\tLastLogTerm  uint64\n\n\t// Used to indicate to peers if this vote was triggered by a leadership\n\t// transfer. It is required for leadership transfer to work, because servers\n\t// wouldn't vote otherwise if they are aware of an existing leader.\n\tLeadershipTransfer bool\n}\n\n// GetRPCHeader - See WithRPCHeader.\nfunc (r *RequestVoteRequest) GetRPCHeader() RPCHeader {\n\treturn r.RPCHeader\n}\n\n// RequestVoteResponse is the response returned from a RequestVoteRequest.\ntype RequestVoteResponse struct {\n\tRPCHeader\n\n\t// Newer term if leader is out of date.\n\tTerm uint64\n\n\t// Peers is deprecated, but required by servers that only understand\n\t// protocol version 0. This is not populated in protocol version 2\n\t// and later.\n\tPeers []byte\n\n\t// Is the vote granted.\n\tGranted bool\n}\n\n// GetRPCHeader - See WithRPCHeader.\nfunc (r *RequestVoteResponse) GetRPCHeader() RPCHeader {\n\treturn r.RPCHeader\n}\n\n// RequestPreVoteRequest is the command used by a candidate to ask a Raft peer\n// for a vote in an election.\ntype RequestPreVoteRequest struct {\n\tRPCHeader\n\n\t// Provide the term and our id\n\tTerm uint64\n\n\t// Used to ensure safety\n\tLastLogIndex uint64\n\tLastLogTerm  uint64\n}\n\n// GetRPCHeader - See WithRPCHeader.\nfunc (r *RequestPreVoteRequest) GetRPCHeader() RPCHeader {\n\treturn r.RPCHeader\n}\n\n// RequestPreVoteResponse is the response returned from a RequestPreVoteRequest.\ntype RequestPreVoteResponse struct {\n\tRPCHeader\n\n\t// Newer term if leader is out of date.\n\tTerm uint64\n\n\t// Is the vote granted.\n\tGranted bool\n}\n\n// GetRPCHeader - See WithRPCHeader.\nfunc (r *RequestPreVoteResponse) GetRPCHeader() RPCHeader {\n\treturn r.RPCHeader\n}\n\n// InstallSnapshotRequest is the command sent to a Raft peer to bootstrap its\n// log (and state machine) from a snapshot on another peer.\ntype InstallSnapshotRequest struct {\n\tRPCHeader\n\tSnapshotVersion SnapshotVersion\n\n\tTerm   uint64\n\tLeader []byte\n\n\t// These are the last index/term included in the snapshot\n\tLastLogIndex uint64\n\tLastLogTerm  uint64\n\n\t// Peer Set in the snapshot.\n\t// but remains here in case we receive an InstallSnapshot from a leader\n\t// that's running old code.\n\t// Deprecated: This is deprecated in favor of Configuration\n\tPeers []byte\n\n\t// Cluster membership.\n\tConfiguration []byte\n\t// Log index where 'Configuration' entry was originally written.\n\tConfigurationIndex uint64\n\n\t// Size of the snapshot\n\tSize int64\n}\n\n// GetRPCHeader - See WithRPCHeader.\nfunc (r *InstallSnapshotRequest) GetRPCHeader() RPCHeader {\n\treturn r.RPCHeader\n}\n\n// InstallSnapshotResponse is the response returned from an\n// InstallSnapshotRequest.\ntype InstallSnapshotResponse struct {\n\tRPCHeader\n\n\tTerm    uint64\n\tSuccess bool\n}\n\n// GetRPCHeader - See WithRPCHeader.\nfunc (r *InstallSnapshotResponse) GetRPCHeader() RPCHeader {\n\treturn r.RPCHeader\n}\n\n// TimeoutNowRequest is the command used by a leader to signal another server to\n// start an election.\ntype TimeoutNowRequest struct {\n\tRPCHeader\n}\n\n// GetRPCHeader - See WithRPCHeader.\nfunc (r *TimeoutNowRequest) GetRPCHeader() RPCHeader {\n\treturn r.RPCHeader\n}\n\n// TimeoutNowResponse is the response to TimeoutNowRequest.\ntype TimeoutNowResponse struct {\n\tRPCHeader\n}\n\n// GetRPCHeader - See WithRPCHeader.\nfunc (r *TimeoutNowResponse) GetRPCHeader() RPCHeader {\n\treturn r.RPCHeader\n}\n"
  },
  {
    "path": "commitment.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"sort\"\n\t\"sync\"\n)\n\n// Commitment is used to advance the leader's commit index. The leader and\n// replication goroutines report in newly written entries with match(), and\n// this notifies on commitCh when the commit index has advanced.\ntype commitment struct {\n\t// protects matchIndexes and commitIndex\n\tsync.Mutex\n\t// notified when commitIndex increases\n\tcommitCh chan struct{}\n\t// voter ID to log index: the server stores up through this log entry\n\tmatchIndexes map[ServerID]uint64\n\t// a quorum stores up through this log entry. monotonically increases.\n\tcommitIndex uint64\n\t// the first index of this leader's term: this needs to be replicated to a\n\t// majority of the cluster before this leader may mark anything committed\n\t// (per Raft's commitment rule)\n\tstartIndex uint64\n}\n\n// newCommitment returns a commitment struct that notifies the provided\n// channel when log entries have been committed. A new commitment struct is\n// created each time this server becomes leader for a particular term.\n// 'configuration' is the servers in the cluster.\n// 'startIndex' is the first index created in this term (see\n// its description above).\nfunc newCommitment(commitCh chan struct{}, configuration Configuration, startIndex uint64) *commitment {\n\tmatchIndexes := make(map[ServerID]uint64)\n\tfor _, server := range configuration.Servers {\n\t\tif server.Suffrage == Voter {\n\t\t\tmatchIndexes[server.ID] = 0\n\t\t}\n\t}\n\treturn &commitment{\n\t\tcommitCh:     commitCh,\n\t\tmatchIndexes: matchIndexes,\n\t\tcommitIndex:  0,\n\t\tstartIndex:   startIndex,\n\t}\n}\n\n// Called when a new cluster membership configuration is created: it will be\n// used to determine commitment from now on. 'configuration' is the servers in\n// the cluster.\nfunc (c *commitment) setConfiguration(configuration Configuration) {\n\tc.Lock()\n\tdefer c.Unlock()\n\toldMatchIndexes := c.matchIndexes\n\tc.matchIndexes = make(map[ServerID]uint64)\n\tfor _, server := range configuration.Servers {\n\t\tif server.Suffrage == Voter {\n\t\t\tc.matchIndexes[server.ID] = oldMatchIndexes[server.ID] // defaults to 0\n\t\t}\n\t}\n\tc.recalculate()\n}\n\n// Called by leader after commitCh is notified\nfunc (c *commitment) getCommitIndex() uint64 {\n\tc.Lock()\n\tdefer c.Unlock()\n\treturn c.commitIndex\n}\n\n// Match is called once a server completes writing entries to disk: either the\n// leader has written the new entry or a follower has replied to an\n// AppendEntries RPC. The given server's disk agrees with this server's log up\n// through the given index.\nfunc (c *commitment) match(server ServerID, matchIndex uint64) {\n\tc.Lock()\n\tdefer c.Unlock()\n\tif prev, hasVote := c.matchIndexes[server]; hasVote && matchIndex > prev {\n\t\tc.matchIndexes[server] = matchIndex\n\t\tc.recalculate()\n\t}\n}\n\n// Internal helper to calculate new commitIndex from matchIndexes.\n// Must be called with lock held.\nfunc (c *commitment) recalculate() {\n\tif len(c.matchIndexes) == 0 {\n\t\treturn\n\t}\n\n\tmatched := make([]uint64, 0, len(c.matchIndexes))\n\tfor _, idx := range c.matchIndexes {\n\t\tmatched = append(matched, idx)\n\t}\n\tsort.Sort(uint64Slice(matched))\n\tquorumMatchIndex := matched[(len(matched)-1)/2]\n\n\tif quorumMatchIndex > c.commitIndex && quorumMatchIndex >= c.startIndex {\n\t\tc.commitIndex = quorumMatchIndex\n\t\tasyncNotifyCh(c.commitCh)\n\t}\n}\n"
  },
  {
    "path": "commitment_test.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"testing\"\n)\n\nfunc makeConfiguration(voters []string) Configuration {\n\tvar configuration Configuration\n\tfor _, voter := range voters {\n\t\tconfiguration.Servers = append(configuration.Servers, Server{\n\t\t\tSuffrage: Voter,\n\t\t\tAddress:  ServerAddress(voter + \"addr\"),\n\t\t\tID:       ServerID(voter),\n\t\t})\n\t}\n\treturn configuration\n}\n\n// Returns a slice of server names of size n.\nfunc voters(n int) Configuration {\n\tif n > 7 {\n\t\tpanic(\"only up to 7 servers implemented\")\n\t}\n\treturn makeConfiguration([]string{\"s1\", \"s2\", \"s3\", \"s4\", \"s5\", \"s6\", \"s7\"}[:n])\n}\n\n// Tests setVoters() keeps matchIndexes where possible.\nfunc TestCommitment_setVoters(t *testing.T) {\n\tcommitCh := make(chan struct{}, 1)\n\tc := newCommitment(commitCh, makeConfiguration([]string{\"a\", \"b\", \"c\"}), 0)\n\tc.match(\"a\", 10)\n\tc.match(\"b\", 20)\n\tc.match(\"c\", 30)\n\t// commitIndex: 20\n\tif !drainNotifyCh(commitCh) {\n\t\tt.Fatalf(\"expected commit notify\")\n\t}\n\tc.setConfiguration(makeConfiguration([]string{\"c\", \"d\", \"e\"}))\n\t// c: 30, d: 0, e: 0\n\tc.match(\"e\", 40)\n\tif c.getCommitIndex() != 30 {\n\t\tt.Fatalf(\"expected 30 entries committed, found %d\",\n\t\t\tc.getCommitIndex())\n\t}\n\tif !drainNotifyCh(commitCh) {\n\t\tt.Fatalf(\"expected commit notify\")\n\t}\n}\n\n// Tests match() being called with smaller index than before.\nfunc TestCommitment_match_max(t *testing.T) {\n\tcommitCh := make(chan struct{}, 1)\n\tc := newCommitment(commitCh, voters(5), 4)\n\n\tc.match(\"s1\", 8)\n\tc.match(\"s2\", 8)\n\tc.match(\"s2\", 1)\n\tc.match(\"s3\", 8)\n\n\tif c.getCommitIndex() != 8 {\n\t\tt.Fatalf(\"calling match with an earlier index should be ignored\")\n\t}\n}\n\n// Tests match() being called with non-voters.\nfunc TestCommitment_match_nonVoting(t *testing.T) {\n\tcommitCh := make(chan struct{}, 1)\n\tc := newCommitment(commitCh, voters(5), 4)\n\n\tc.match(\"s1\", 8)\n\tc.match(\"s2\", 8)\n\tc.match(\"s3\", 8)\n\n\tif !drainNotifyCh(commitCh) {\n\t\tt.Fatalf(\"expected commit notify\")\n\t}\n\n\tc.match(\"s90\", 10)\n\tc.match(\"s91\", 10)\n\tc.match(\"s92\", 10)\n\n\tif c.getCommitIndex() != 8 {\n\t\tt.Fatalf(\"non-voting servers shouldn't be able to commit\")\n\t}\n\tif drainNotifyCh(commitCh) {\n\t\tt.Fatalf(\"unexpected commit notify\")\n\t}\n}\n\n// Tests recalculate() algorithm.\nfunc TestCommitment_recalculate(t *testing.T) {\n\tcommitCh := make(chan struct{}, 1)\n\tc := newCommitment(commitCh, voters(5), 0)\n\n\tc.match(\"s1\", 30)\n\tc.match(\"s2\", 20)\n\n\tif c.getCommitIndex() != 0 {\n\t\tt.Fatalf(\"shouldn't commit after two of five servers\")\n\t}\n\tif drainNotifyCh(commitCh) {\n\t\tt.Fatalf(\"unexpected commit notify\")\n\t}\n\n\tc.match(\"s3\", 10)\n\tif c.getCommitIndex() != 10 {\n\t\tt.Fatalf(\"expected 10 entries committed, found %d\",\n\t\t\tc.getCommitIndex())\n\t}\n\tif !drainNotifyCh(commitCh) {\n\t\tt.Fatalf(\"expected commit notify\")\n\t}\n\tc.match(\"s4\", 15)\n\tif c.getCommitIndex() != 15 {\n\t\tt.Fatalf(\"expected 15 entries committed, found %d\",\n\t\t\tc.getCommitIndex())\n\t}\n\tif !drainNotifyCh(commitCh) {\n\t\tt.Fatalf(\"expected commit notify\")\n\t}\n\n\tc.setConfiguration(voters(3))\n\t// s1: 30, s2: 20, s3: 10\n\tif c.getCommitIndex() != 20 {\n\t\tt.Fatalf(\"expected 20 entries committed, found %d\",\n\t\t\tc.getCommitIndex())\n\t}\n\tif !drainNotifyCh(commitCh) {\n\t\tt.Fatalf(\"expected commit notify\")\n\t}\n\n\tc.setConfiguration(voters(4))\n\t// s1: 30, s2: 20, s3: 10, s4: 0\n\tc.match(\"s2\", 25)\n\tif c.getCommitIndex() != 20 {\n\t\tt.Fatalf(\"expected 20 entries committed, found %d\",\n\t\t\tc.getCommitIndex())\n\t}\n\tif drainNotifyCh(commitCh) {\n\t\tt.Fatalf(\"unexpected commit notify\")\n\t}\n\tc.match(\"s4\", 23)\n\tif c.getCommitIndex() != 23 {\n\t\tt.Fatalf(\"expected 23 entries committed, found %d\",\n\t\t\tc.getCommitIndex())\n\t}\n\tif !drainNotifyCh(commitCh) {\n\t\tt.Fatalf(\"expected commit notify\")\n\t}\n}\n\n// Tests recalculate() respecting startIndex.\nfunc TestCommitment_recalculate_startIndex(t *testing.T) {\n\tcommitCh := make(chan struct{}, 1)\n\tc := newCommitment(commitCh, voters(5), 4)\n\n\tc.match(\"s1\", 3)\n\tc.match(\"s2\", 3)\n\tc.match(\"s3\", 3)\n\n\tif c.getCommitIndex() != 0 {\n\t\tt.Fatalf(\"can't commit until startIndex is replicated to a quorum\")\n\t}\n\tif drainNotifyCh(commitCh) {\n\t\tt.Fatalf(\"unexpected commit notify\")\n\t}\n\n\tc.match(\"s1\", 4)\n\tc.match(\"s2\", 4)\n\tc.match(\"s3\", 4)\n\n\tif c.getCommitIndex() != 4 {\n\t\tt.Fatalf(\"should be able to commit startIndex once replicated to a quorum\")\n\t}\n\tif !drainNotifyCh(commitCh) {\n\t\tt.Fatalf(\"expected commit notify\")\n\t}\n}\n\n// With no voting members in the cluster, the most sane behavior is probably\n// to not mark anything committed.\nfunc TestCommitment_noVoterSanity(t *testing.T) {\n\tcommitCh := make(chan struct{}, 1)\n\tc := newCommitment(commitCh, makeConfiguration([]string{}), 4)\n\tc.match(\"s1\", 10)\n\tc.setConfiguration(makeConfiguration([]string{}))\n\tc.match(\"s1\", 10)\n\tif c.getCommitIndex() != 0 {\n\t\tt.Fatalf(\"no voting servers: shouldn't be able to commit\")\n\t}\n\tif drainNotifyCh(commitCh) {\n\t\tt.Fatalf(\"unexpected commit notify\")\n\t}\n\n\t// add a voter so we can commit something and then remove it\n\tc.setConfiguration(voters(1))\n\tc.match(\"s1\", 10)\n\tif c.getCommitIndex() != 10 {\n\t\tt.Fatalf(\"expected 10 entries committed, found %d\",\n\t\t\tc.getCommitIndex())\n\t}\n\tif !drainNotifyCh(commitCh) {\n\t\tt.Fatalf(\"expected commit notify\")\n\t}\n\n\tc.setConfiguration(makeConfiguration([]string{}))\n\tc.match(\"s1\", 20)\n\tif c.getCommitIndex() != 10 {\n\t\tt.Fatalf(\"expected 10 entries committed, found %d\",\n\t\t\tc.getCommitIndex())\n\t}\n\tif drainNotifyCh(commitCh) {\n\t\tt.Fatalf(\"unexpected commit notify\")\n\t}\n}\n\n// Single voter commits immediately.\nfunc TestCommitment_singleVoter(t *testing.T) {\n\tcommitCh := make(chan struct{}, 1)\n\tc := newCommitment(commitCh, voters(1), 4)\n\tc.match(\"s1\", 10)\n\tif c.getCommitIndex() != 10 {\n\t\tt.Fatalf(\"expected 10 entries committed, found %d\",\n\t\t\tc.getCommitIndex())\n\t}\n\tif !drainNotifyCh(commitCh) {\n\t\tt.Fatalf(\"expected commit notify\")\n\t}\n\tc.setConfiguration(voters(1))\n\tif drainNotifyCh(commitCh) {\n\t\tt.Fatalf(\"unexpected commit notify\")\n\t}\n\tc.match(\"s1\", 12)\n\tif c.getCommitIndex() != 12 {\n\t\tt.Fatalf(\"expected 12 entries committed, found %d\",\n\t\t\tc.getCommitIndex())\n\t}\n\tif !drainNotifyCh(commitCh) {\n\t\tt.Fatalf(\"expected commit notify\")\n\t}\n}\n"
  },
  {
    "path": "config.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"fmt\"\n\t\"io\"\n\t\"os\"\n\t\"time\"\n\n\t\"github.com/hashicorp/go-hclog\"\n)\n\n// ProtocolVersion is the version of the protocol (which includes RPC messages\n// as well as Raft-specific log entries) that this server can _understand_. Use\n// the ProtocolVersion member of the Config object to control the version of\n// the protocol to use when _speaking_ to other servers. Note that depending on\n// the protocol version being spoken, some otherwise understood RPC messages\n// may be refused. See dispositionRPC for details of this logic.\n//\n// There are notes about the upgrade path in the description of the versions\n// below. If you are starting a fresh cluster then there's no reason not to\n// jump right to the latest protocol version. If you need to interoperate with\n// older, version 0 Raft servers you'll need to drive the cluster through the\n// different versions in order.\n//\n// The version details are complicated, but here's a summary of what's required\n// to get from a version 0 cluster to version 3:\n//\n//  1. In version N of your app that starts using the new Raft library with\n//     versioning, set ProtocolVersion to 1.\n//  2. Make version N+1 of your app require version N as a prerequisite (all\n//     servers must be upgraded). For version N+1 of your app set ProtocolVersion\n//     to 2.\n//  3. Similarly, make version N+2 of your app require version N+1 as a\n//     prerequisite. For version N+2 of your app, set ProtocolVersion to 3.\n//\n// During this upgrade, older cluster members will still have Server IDs equal\n// to their network addresses. To upgrade an older member and give it an ID, it\n// needs to leave the cluster and re-enter:\n//\n//  1. Remove the server from the cluster with RemoveServer, using its network\n//     address as its ServerID.\n//  2. Update the server's config to use a UUID or something else that is\n//     not tied to the machine as the ServerID (restarting the server).\n//  3. Add the server back to the cluster with AddVoter, using its new ID.\n//\n// You can do this during the rolling upgrade from N+1 to N+2 of your app, or\n// as a rolling change at any time after the upgrade.\n//\n// # Version History\n//\n// 0: Original Raft library before versioning was added. Servers running this\n//\n//\tversion of the Raft library use AddPeerDeprecated/RemovePeerDeprecated\n//\tfor all configuration changes, and have no support for LogConfiguration.\n//\n// 1: First versioned protocol, used to interoperate with old servers, and begin\n//\n//\tthe migration path to newer versions of the protocol. Under this version\n//\tall configuration changes are propagated using the now-deprecated\n//\tRemovePeerDeprecated Raft log entry. This means that server IDs are always\n//\tset to be the same as the server addresses (since the old log entry type\n//\tcannot transmit an ID), and only AddPeer/RemovePeer APIs are supported.\n//\tServers running this version of the protocol can understand the new\n//\tLogConfiguration Raft log entry but will never generate one so they can\n//\tremain compatible with version 0 Raft servers in the cluster.\n//\n// 2: Transitional protocol used when migrating an existing cluster to the new\n//\n//\tserver ID system. Server IDs are still set to be the same as server\n//\taddresses, but all configuration changes are propagated using the new\n//\tLogConfiguration Raft log entry type, which can carry full ID information.\n//\tThis version supports the old AddPeer/RemovePeer APIs as well as the new\n//\tID-based AddVoter/RemoveServer APIs which should be used when adding\n//\tversion 3 servers to the cluster later. This version sheds all\n//\tinteroperability with version 0 servers, but can interoperate with newer\n//\tRaft servers running with protocol version 1 since they can understand the\n//\tnew LogConfiguration Raft log entry, and this version can still understand\n//\ttheir RemovePeerDeprecated Raft log entries. We need this protocol version\n//\tas an intermediate step between 1 and 3 so that servers will propagate the\n//\tID information that will come from newly-added (or -rolled) servers using\n//\tprotocol version 3, but since they are still using their address-based IDs\n//\tfrom the previous step they will still be able to track commitments and\n//\ttheir own voting status properly. If we skipped this step, servers would\n//\tbe started with their new IDs, but they wouldn't see themselves in the old\n//\taddress-based configuration, so none of the servers would think they had a\n//\tvote.\n//\n// 3: Protocol adding full support for server IDs and new ID-based server APIs\n//\n//\t(AddVoter, AddNonvoter, etc.), old AddPeer/RemovePeer APIs are no longer\n//\tsupported. Version 2 servers should be swapped out by removing them from\n//\tthe cluster one-by-one and re-adding them with updated configuration for\n//\tthis protocol version, along with their server ID. The remove/add cycle\n//\tis required to populate their server ID. Note that removing must be done\n//\tby ID, which will be the old server's address.\ntype ProtocolVersion int\n\nconst (\n\t// ProtocolVersionMin is the minimum protocol version\n\tProtocolVersionMin ProtocolVersion = 0\n\t// ProtocolVersionMax is the maximum protocol version\n\tProtocolVersionMax = 3\n)\n\n// SnapshotVersion is the version of snapshots that this server can understand.\n// Currently, it is always assumed that the server generates the latest version,\n// though this may be changed in the future to include a configurable version.\n//\n// # Version History\n//\n// 0: Original Raft library before versioning was added. The peers portion of\n//\n//\tthese snapshots is encoded in the legacy format which requires decodePeers\n//\tto parse. This version of snapshots should only be produced by the\n//\tunversioned Raft library.\n//\n// 1: New format which adds support for a full configuration structure and its\n//\n//\tassociated log index, with support for server IDs and non-voting server\n//\tmodes. To ease upgrades, this also includes the legacy peers structure but\n//\tthat will never be used by servers that understand version 1 snapshots.\n//\tSince the original Raft library didn't enforce any versioning, we must\n//\tinclude the legacy peers structure for this version, but we can deprecate\n//\tit in the next snapshot version.\ntype SnapshotVersion int\n\nconst (\n\t// SnapshotVersionMin is the minimum snapshot version\n\tSnapshotVersionMin SnapshotVersion = 0\n\t// SnapshotVersionMax is the maximum snapshot version\n\tSnapshotVersionMax = 1\n)\n\n// Config provides any necessary configuration for the Raft server.\ntype Config struct {\n\t// ProtocolVersion allows a Raft server to inter-operate with older\n\t// Raft servers running an older version of the code. This is used to\n\t// version the wire protocol as well as Raft-specific log entries that\n\t// the server uses when _speaking_ to other servers. There is currently\n\t// no auto-negotiation of versions so all servers must be manually\n\t// configured with compatible versions. See ProtocolVersionMin and\n\t// ProtocolVersionMax for the versions of the protocol that this server\n\t// can _understand_.\n\tProtocolVersion ProtocolVersion\n\n\t// HeartbeatTimeout specifies the time in follower state without contact\n\t// from a leader before we attempt an election.\n\tHeartbeatTimeout time.Duration\n\n\t// ElectionTimeout specifies the time in candidate state without contact\n\t// from a leader before we attempt an election.\n\tElectionTimeout time.Duration\n\n\t// CommitTimeout specifies the time without an Apply operation before the\n\t// leader sends an AppendEntry RPC to followers, to ensure a timely commit of\n\t// log entries.\n\t// Due to random staggering, may be delayed as much as 2x this value.\n\tCommitTimeout time.Duration\n\n\t// MaxAppendEntries controls the maximum number of append entries\n\t// to send at once. We want to strike a balance between efficiency\n\t// and avoiding waste if the follower is going to reject because of\n\t// an inconsistent log.\n\tMaxAppendEntries int\n\n\t// BatchApplyCh indicates whether we should buffer applyCh\n\t// to size MaxAppendEntries. This enables batch log commitment,\n\t// but breaks the timeout guarantee on Apply. Specifically,\n\t// a log can be added to the applyCh buffer but not actually be\n\t// processed until after the specified timeout.\n\tBatchApplyCh bool\n\n\t// If we are a member of a cluster, and RemovePeer is invoked for the\n\t// local node, then we forget all peers and transition into the follower state.\n\t// If ShutdownOnRemove is set, we additional shutdown Raft. Otherwise,\n\t// we can become a leader of a cluster containing only this node.\n\tShutdownOnRemove bool\n\n\t// TrailingLogs controls how many logs we leave after a snapshot. This is used\n\t// so that we can quickly replay logs on a follower instead of being forced to\n\t// send an entire snapshot. The value passed here is the initial setting used.\n\t// This can be tuned during operation using ReloadConfig.\n\tTrailingLogs uint64\n\n\t// SnapshotInterval controls how often we check if we should perform a\n\t// snapshot. We randomly stagger between this value and 2x this value to avoid\n\t// the entire cluster from performing a snapshot at once. The value passed\n\t// here is the initial setting used. This can be tuned during operation using\n\t// ReloadConfig.\n\tSnapshotInterval time.Duration\n\n\t// SnapshotThreshold controls how many outstanding logs there must be before\n\t// we perform a snapshot. This is to prevent excessive snapshotting by\n\t// replaying a small set of logs instead. The value passed here is the initial\n\t// setting used. This can be tuned during operation using ReloadConfig.\n\tSnapshotThreshold uint64\n\n\t// LeaderLeaseTimeout is used to control how long the \"lease\" lasts\n\t// for being the leader without being able to contact a quorum\n\t// of nodes. If we reach this interval without contact, we will\n\t// step down as leader.\n\tLeaderLeaseTimeout time.Duration\n\n\t// LocalID is a unique ID for this server across all time. When running with\n\t// ProtocolVersion < 3, you must set this to be the same as the network\n\t// address of your transport.\n\tLocalID ServerID\n\n\t// NotifyCh is used to provide a channel that will be notified of leadership\n\t// changes. Raft will block writing to this channel, so it should either be\n\t// buffered or aggressively consumed.\n\tNotifyCh chan<- bool\n\n\t// LogOutput is used as a sink for logs, unless Logger is specified.\n\t// Defaults to os.Stderr.\n\tLogOutput io.Writer\n\n\t// LogLevel represents a log level. If the value does not match a known\n\t// logging level hclog.NoLevel is used.\n\tLogLevel string\n\n\t// Logger is a user-provided logger. If nil, a logger writing to\n\t// LogOutput with LogLevel is used.\n\tLogger hclog.Logger\n\n\t// NoSnapshotRestoreOnStart controls if raft will restore a snapshot to the\n\t// FSM on start. This is useful if your FSM recovers from other mechanisms\n\t// than raft snapshotting. Snapshot metadata will still be used to initialize\n\t// raft's configuration and index values.\n\tNoSnapshotRestoreOnStart bool\n\n\t// PreVoteDisabled deactivate the pre-vote feature when set to true\n\tPreVoteDisabled bool\n\n\t// NoLegacyTelemetry allows to skip the legacy metrics to avoid duplicates.\n\t// legacy metrics are those that have `_peer_name` as metric suffix instead as labels.\n\t// e.g: raft_replication_heartbeat_peer0\n\tNoLegacyTelemetry bool\n\n\t// skipStartup allows NewRaft() to bypass all background work goroutines\n\tskipStartup bool\n}\n\nfunc (conf *Config) getOrCreateLogger() hclog.Logger {\n\tif conf.Logger != nil {\n\t\treturn conf.Logger\n\t}\n\tif conf.LogOutput == nil {\n\t\tconf.LogOutput = os.Stderr\n\t}\n\n\treturn hclog.New(&hclog.LoggerOptions{\n\t\tName:   \"raft\",\n\t\tLevel:  hclog.LevelFromString(conf.LogLevel),\n\t\tOutput: conf.LogOutput,\n\t})\n}\n\n// ReloadableConfig is the subset of Config that may be reconfigured during\n// runtime using raft.ReloadConfig. We choose to duplicate fields over embedding\n// or accepting a Config but only using specific fields to keep the API clear.\n// Reconfiguring some fields is potentially dangerous so we should only\n// selectively enable it for fields where that is allowed.\ntype ReloadableConfig struct {\n\t// TrailingLogs controls how many logs we leave after a snapshot. This is used\n\t// so that we can quickly replay logs on a follower instead of being forced to\n\t// send an entire snapshot. The value passed here updates the setting at runtime\n\t// which will take effect as soon as the next snapshot completes and truncation\n\t// occurs.\n\tTrailingLogs uint64\n\n\t// SnapshotInterval controls how often we check if we should perform a snapshot.\n\t// We randomly stagger between this value and 2x this value to avoid the entire\n\t// cluster from performing a snapshot at once.\n\tSnapshotInterval time.Duration\n\n\t// SnapshotThreshold controls how many outstanding logs there must be before\n\t// we perform a snapshot. This is to prevent excessive snapshots when we can\n\t// just replay a small set of logs.\n\tSnapshotThreshold uint64\n\n\t// HeartbeatTimeout specifies the time in follower state without\n\t// a leader before we attempt an election.\n\tHeartbeatTimeout time.Duration\n\n\t// ElectionTimeout specifies the time in candidate state without\n\t// a leader before we attempt an election.\n\tElectionTimeout time.Duration\n}\n\n// apply sets the reloadable fields on the passed Config to the values in\n// `ReloadableConfig`. It returns a copy of Config with the fields from this\n// ReloadableConfig set.\nfunc (rc *ReloadableConfig) apply(to Config) Config {\n\tto.TrailingLogs = rc.TrailingLogs\n\tto.SnapshotInterval = rc.SnapshotInterval\n\tto.SnapshotThreshold = rc.SnapshotThreshold\n\tto.HeartbeatTimeout = rc.HeartbeatTimeout\n\tto.ElectionTimeout = rc.ElectionTimeout\n\treturn to\n}\n\n// fromConfig copies the reloadable fields from the passed Config.\nfunc (rc *ReloadableConfig) fromConfig(from Config) {\n\trc.TrailingLogs = from.TrailingLogs\n\trc.SnapshotInterval = from.SnapshotInterval\n\trc.SnapshotThreshold = from.SnapshotThreshold\n\trc.HeartbeatTimeout = from.HeartbeatTimeout\n\trc.ElectionTimeout = from.ElectionTimeout\n}\n\n// DefaultConfig returns a Config with usable defaults.\nfunc DefaultConfig() *Config {\n\treturn &Config{\n\t\tProtocolVersion:    ProtocolVersionMax,\n\t\tHeartbeatTimeout:   1000 * time.Millisecond,\n\t\tElectionTimeout:    1000 * time.Millisecond,\n\t\tCommitTimeout:      50 * time.Millisecond,\n\t\tMaxAppendEntries:   64,\n\t\tShutdownOnRemove:   true,\n\t\tTrailingLogs:       10240,\n\t\tSnapshotInterval:   120 * time.Second,\n\t\tSnapshotThreshold:  8192,\n\t\tLeaderLeaseTimeout: 500 * time.Millisecond,\n\t\tLogLevel:           \"DEBUG\",\n\t}\n}\n\n// ValidateConfig is used to validate a sane configuration\nfunc ValidateConfig(config *Config) error {\n\t// We don't actually support running as 0 in the library any more, but\n\t// we do understand it.\n\tprotocolMin := ProtocolVersionMin\n\tif protocolMin == 0 {\n\t\tprotocolMin = 1\n\t}\n\tif config.ProtocolVersion < protocolMin ||\n\t\tconfig.ProtocolVersion > ProtocolVersionMax {\n\t\treturn fmt.Errorf(\"ProtocolVersion %d must be >= %d and <= %d\",\n\t\t\tconfig.ProtocolVersion, protocolMin, ProtocolVersionMax)\n\t}\n\tif len(config.LocalID) == 0 {\n\t\treturn fmt.Errorf(\"LocalID cannot be empty\")\n\t}\n\tif config.HeartbeatTimeout < 5*time.Millisecond {\n\t\treturn fmt.Errorf(\"HeartbeatTimeout is too low\")\n\t}\n\tif config.ElectionTimeout < 5*time.Millisecond {\n\t\treturn fmt.Errorf(\"ElectionTimeout is too low\")\n\t}\n\tif config.CommitTimeout < time.Millisecond {\n\t\treturn fmt.Errorf(\"CommitTimeout is too low\")\n\t}\n\tif config.MaxAppendEntries <= 0 {\n\t\treturn fmt.Errorf(\"MaxAppendEntries must be positive\")\n\t}\n\tif config.MaxAppendEntries > 1024 {\n\t\treturn fmt.Errorf(\"MaxAppendEntries is too large\")\n\t}\n\tif config.SnapshotInterval < 5*time.Millisecond {\n\t\treturn fmt.Errorf(\"SnapshotInterval is too low\")\n\t}\n\tif config.LeaderLeaseTimeout < 5*time.Millisecond {\n\t\treturn fmt.Errorf(\"LeaderLeaseTimeout is too low\")\n\t}\n\tif config.LeaderLeaseTimeout > config.HeartbeatTimeout {\n\t\treturn fmt.Errorf(\"LeaderLeaseTimeout (%s) cannot be larger than heartbeat timeout (%s)\", config.LeaderLeaseTimeout, config.HeartbeatTimeout)\n\t}\n\tif config.ElectionTimeout < config.HeartbeatTimeout {\n\t\treturn fmt.Errorf(\"ElectionTimeout (%s) must be equal or greater than Heartbeat Timeout (%s)\", config.ElectionTimeout, config.HeartbeatTimeout)\n\t}\n\treturn nil\n}\n"
  },
  {
    "path": "configuration.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport \"fmt\"\n\n// ServerSuffrage determines whether a Server in a Configuration gets a vote.\ntype ServerSuffrage int\n\n// Note: Don't renumber these, since the numbers are written into the log.\nconst (\n\t// Voter is a server whose vote is counted in elections and whose match index\n\t// is used in advancing the leader's commit index.\n\tVoter ServerSuffrage = iota\n\t// Nonvoter is a server that receives log entries but is not considered for\n\t// elections or commitment purposes.\n\tNonvoter\n\t// Staging is a server that acts like a Nonvoter. A configuration change\n\t// with a ConfigurationChangeCommand of Promote can change a Staging server\n\t// into a Voter.\n\t// Deprecated: use Nonvoter instead.\n\tStaging\n)\n\nfunc (s ServerSuffrage) String() string {\n\tswitch s {\n\tcase Voter:\n\t\treturn \"Voter\"\n\tcase Nonvoter:\n\t\treturn \"Nonvoter\"\n\tcase Staging:\n\t\treturn \"Staging\"\n\t}\n\treturn \"ServerSuffrage\"\n}\n\n// ConfigurationStore provides an interface that can optionally be implemented by FSMs\n// to store configuration updates made in the replicated log. In general this is only\n// necessary for FSMs that mutate durable state directly instead of applying changes\n// in memory and snapshotting periodically. By storing configuration changes, the\n// persistent FSM state can behave as a complete snapshot, and be able to recover\n// without an external snapshot just for persisting the raft configuration.\ntype ConfigurationStore interface {\n\t// ConfigurationStore is a superset of the FSM functionality\n\tFSM\n\n\t// StoreConfiguration is invoked once a log entry containing a configuration\n\t// change is committed. It takes the index at which the configuration was\n\t// written and the configuration value.\n\tStoreConfiguration(index uint64, configuration Configuration)\n}\n\n// ServerID is a unique string identifying a server for all time.\ntype ServerID string\n\n// ServerAddress is a network address for a server that a transport can contact.\ntype ServerAddress string\n\n// Server tracks the information about a single server in a configuration.\ntype Server struct {\n\t// Suffrage determines whether the server gets a vote.\n\tSuffrage ServerSuffrage\n\t// ID is a unique string identifying this server for all time.\n\tID ServerID\n\t// Address is its network address that a transport can contact.\n\tAddress ServerAddress\n}\n\n// Configuration tracks which servers are in the cluster, and whether they have\n// votes. This should include the local server, if it's a member of the cluster.\n// The servers are listed no particular order, but each should only appear once.\n// These entries are appended to the log during membership changes.\ntype Configuration struct {\n\tServers []Server\n}\n\n// Clone makes a deep copy of a Configuration.\nfunc (c *Configuration) Clone() (copy Configuration) {\n\tcopy.Servers = append(copy.Servers, c.Servers...)\n\treturn\n}\n\n// ConfigurationChangeCommand is the different ways to change the cluster\n// configuration.\ntype ConfigurationChangeCommand uint8\n\nconst (\n\t// AddVoter adds a server with Suffrage of Voter.\n\tAddVoter ConfigurationChangeCommand = iota\n\t// AddNonvoter makes a server Nonvoter unless its Staging or Voter.\n\tAddNonvoter\n\t// DemoteVoter makes a server Nonvoter unless its absent.\n\tDemoteVoter\n\t// RemoveServer removes a server entirely from the cluster membership.\n\tRemoveServer\n\t// Promote changes a server from Staging to Voter. The command will be a\n\t// no-op if the server is not Staging.\n\t// Deprecated: use AddVoter instead.\n\tPromote\n\t// AddStaging makes a server a Voter.\n\t// Deprecated: AddStaging was actually AddVoter. Use AddVoter instead.\n\tAddStaging = 0 // explicit 0 to preserve the old value.\n)\n\nfunc (c ConfigurationChangeCommand) String() string {\n\tswitch c {\n\tcase AddVoter:\n\t\treturn \"AddVoter\"\n\tcase AddNonvoter:\n\t\treturn \"AddNonvoter\"\n\tcase DemoteVoter:\n\t\treturn \"DemoteVoter\"\n\tcase RemoveServer:\n\t\treturn \"RemoveServer\"\n\tcase Promote:\n\t\treturn \"Promote\"\n\t}\n\treturn \"ConfigurationChangeCommand\"\n}\n\n// configurationChangeRequest describes a change that a leader would like to\n// make to its current configuration. It's used only within a single server\n// (never serialized into the log), as part of `configurationChangeFuture`.\ntype configurationChangeRequest struct {\n\tcommand       ConfigurationChangeCommand\n\tserverID      ServerID\n\tserverAddress ServerAddress // only present for AddVoter, AddNonvoter\n\t// prevIndex, if nonzero, is the index of the only configuration upon which\n\t// this change may be applied; if another configuration entry has been\n\t// added in the meantime, this request will fail.\n\tprevIndex uint64\n}\n\n// configurations is state tracked on every server about its Configurations.\n// Note that, per Diego's dissertation, there can be at most one uncommitted\n// configuration at a time (the next configuration may not be created until the\n// prior one has been committed).\n//\n// One downside to storing just two configurations is that if you try to take a\n// snapshot when your state machine hasn't yet applied the committedIndex, we\n// have no record of the configuration that would logically fit into that\n// snapshot. We disallow snapshots in that case now. An alternative approach,\n// which LogCabin uses, is to track every configuration change in the\n// log.\ntype configurations struct {\n\t// committed is the latest configuration in the log/snapshot that has been\n\t// committed (the one with the largest index).\n\tcommitted Configuration\n\t// committedIndex is the log index where 'committed' was written.\n\tcommittedIndex uint64\n\t// latest is the latest configuration in the log/snapshot (may be committed\n\t// or uncommitted)\n\tlatest Configuration\n\t// latestIndex is the log index where 'latest' was written.\n\tlatestIndex uint64\n}\n\n// Clone makes a deep copy of a configurations object.\nfunc (c *configurations) Clone() (copy configurations) {\n\tcopy.committed = c.committed.Clone()\n\tcopy.committedIndex = c.committedIndex\n\tcopy.latest = c.latest.Clone()\n\tcopy.latestIndex = c.latestIndex\n\treturn\n}\n\n// hasVote returns true if the server identified by 'id' is a Voter in the\n// provided Configuration.\nfunc hasVote(configuration Configuration, id ServerID) bool {\n\tfor _, server := range configuration.Servers {\n\t\tif server.ID == id {\n\t\t\treturn server.Suffrage == Voter\n\t\t}\n\t}\n\treturn false\n}\n\n// inConfiguration returns true if the server identified by 'id' is in the\n// provided Configuration.\nfunc inConfiguration(configuration Configuration, id ServerID) bool {\n\tfor _, server := range configuration.Servers {\n\t\tif server.ID == id {\n\t\t\treturn true\n\t\t}\n\t}\n\treturn false\n}\n\n// checkConfiguration tests a cluster membership configuration for common\n// errors.\nfunc checkConfiguration(configuration Configuration) error {\n\tidSet := make(map[ServerID]bool)\n\taddressSet := make(map[ServerAddress]bool)\n\tvar voters int\n\tfor _, server := range configuration.Servers {\n\t\tif server.ID == \"\" {\n\t\t\treturn fmt.Errorf(\"empty ID in configuration: %v\", configuration)\n\t\t}\n\t\tif server.Address == \"\" {\n\t\t\treturn fmt.Errorf(\"empty address in configuration: %v\", server)\n\t\t}\n\t\tif idSet[server.ID] {\n\t\t\treturn fmt.Errorf(\"found duplicate ID in configuration: %v\", server.ID)\n\t\t}\n\t\tidSet[server.ID] = true\n\t\tif addressSet[server.Address] {\n\t\t\treturn fmt.Errorf(\"found duplicate address in configuration: %v\", server.Address)\n\t\t}\n\t\taddressSet[server.Address] = true\n\t\tif server.Suffrage == Voter {\n\t\t\tvoters++\n\t\t}\n\t}\n\tif voters == 0 {\n\t\treturn fmt.Errorf(\"need at least one voter in configuration: %v\", configuration)\n\t}\n\treturn nil\n}\n\n// nextConfiguration generates a new Configuration from the current one and a\n// configuration change request. It's split from appendConfigurationEntry so\n// that it can be unit tested easily.\nfunc nextConfiguration(current Configuration, currentIndex uint64, change configurationChangeRequest) (Configuration, error) {\n\tif change.prevIndex > 0 && change.prevIndex != currentIndex {\n\t\treturn Configuration{}, fmt.Errorf(\"configuration changed since %v (latest is %v)\", change.prevIndex, currentIndex)\n\t}\n\n\tconfiguration := current.Clone()\n\tswitch change.command {\n\tcase AddVoter:\n\t\tnewServer := Server{\n\t\t\tSuffrage: Voter,\n\t\t\tID:       change.serverID,\n\t\t\tAddress:  change.serverAddress,\n\t\t}\n\t\tfound := false\n\t\tfor i, server := range configuration.Servers {\n\t\t\tif server.ID == change.serverID {\n\t\t\t\tif server.Suffrage == Voter {\n\t\t\t\t\tconfiguration.Servers[i].Address = change.serverAddress\n\t\t\t\t} else {\n\t\t\t\t\tconfiguration.Servers[i] = newServer\n\t\t\t\t}\n\t\t\t\tfound = true\n\t\t\t\tbreak\n\t\t\t}\n\t\t}\n\t\tif !found {\n\t\t\tconfiguration.Servers = append(configuration.Servers, newServer)\n\t\t}\n\tcase AddNonvoter:\n\t\tnewServer := Server{\n\t\t\tSuffrage: Nonvoter,\n\t\t\tID:       change.serverID,\n\t\t\tAddress:  change.serverAddress,\n\t\t}\n\t\tfound := false\n\t\tfor i, server := range configuration.Servers {\n\t\t\tif server.ID == change.serverID {\n\t\t\t\tif server.Suffrage != Nonvoter {\n\t\t\t\t\tconfiguration.Servers[i].Address = change.serverAddress\n\t\t\t\t} else {\n\t\t\t\t\tconfiguration.Servers[i] = newServer\n\t\t\t\t}\n\t\t\t\tfound = true\n\t\t\t\tbreak\n\t\t\t}\n\t\t}\n\t\tif !found {\n\t\t\tconfiguration.Servers = append(configuration.Servers, newServer)\n\t\t}\n\tcase DemoteVoter:\n\t\tfor i, server := range configuration.Servers {\n\t\t\tif server.ID == change.serverID {\n\t\t\t\tconfiguration.Servers[i].Suffrage = Nonvoter\n\t\t\t\tbreak\n\t\t\t}\n\t\t}\n\tcase RemoveServer:\n\t\tfor i, server := range configuration.Servers {\n\t\t\tif server.ID == change.serverID {\n\t\t\t\tconfiguration.Servers = append(configuration.Servers[:i], configuration.Servers[i+1:]...)\n\t\t\t\tbreak\n\t\t\t}\n\t\t}\n\tcase Promote:\n\t\tfor i, server := range configuration.Servers {\n\t\t\tif server.ID == change.serverID && server.Suffrage == Staging {\n\t\t\t\tconfiguration.Servers[i].Suffrage = Voter\n\t\t\t\tbreak\n\t\t\t}\n\t\t}\n\t}\n\n\t// Make sure we didn't do something bad like remove the last voter\n\tif err := checkConfiguration(configuration); err != nil {\n\t\treturn Configuration{}, err\n\t}\n\n\treturn configuration, nil\n}\n\n// encodePeers is used to serialize a Configuration into the old peers format.\n// This is here for backwards compatibility when operating with a mix of old\n// servers and should be removed once we deprecate support for protocol version 1.\nfunc encodePeers(configuration Configuration, trans Transport) []byte {\n\t// Gather up all the voters, other suffrage types are not supported by\n\t// this data format.\n\tvar encPeers [][]byte\n\tfor _, server := range configuration.Servers {\n\t\tif server.Suffrage == Voter {\n\t\t\tencPeers = append(encPeers, trans.EncodePeer(server.ID, server.Address))\n\t\t}\n\t}\n\n\t// Encode the entire array.\n\tbuf, err := encodeMsgPack(encPeers)\n\tif err != nil {\n\t\tpanic(fmt.Errorf(\"failed to encode peers: %v\", err))\n\t}\n\n\treturn buf.Bytes()\n}\n\n// decodePeers is used to deserialize an old list of peers into a Configuration.\n// This is here for backwards compatibility with old log entries and snapshots;\n// it should be removed eventually.\nfunc decodePeers(buf []byte, trans Transport) (Configuration, error) {\n\t// Decode the buffer first.\n\tvar encPeers [][]byte\n\tif err := decodeMsgPack(buf, &encPeers); err != nil {\n\t\treturn Configuration{}, fmt.Errorf(\"failed to decode peers: %v\", err)\n\t}\n\n\t// Deserialize each peer.\n\tvar servers []Server\n\tfor _, enc := range encPeers {\n\t\tp := trans.DecodePeer(enc)\n\t\tservers = append(servers, Server{\n\t\t\tSuffrage: Voter,\n\t\t\tID:       ServerID(p),\n\t\t\tAddress:  p,\n\t\t})\n\t}\n\n\treturn Configuration{Servers: servers}, nil\n}\n\n// EncodeConfiguration serializes a Configuration using MsgPack, or panics on\n// errors.\nfunc EncodeConfiguration(configuration Configuration) []byte {\n\tbuf, err := encodeMsgPack(configuration)\n\tif err != nil {\n\t\tpanic(fmt.Errorf(\"failed to encode configuration: %v\", err))\n\t}\n\treturn buf.Bytes()\n}\n\n// DecodeConfiguration deserializes a Configuration using MsgPack, or panics on\n// errors.\nfunc DecodeConfiguration(buf []byte) Configuration {\n\tvar configuration Configuration\n\tif err := decodeMsgPack(buf, &configuration); err != nil {\n\t\tpanic(fmt.Errorf(\"failed to decode configuration: %v\", err))\n\t}\n\treturn configuration\n}\n"
  },
  {
    "path": "configuration_test.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"fmt\"\n\t\"reflect\"\n\t\"strings\"\n\t\"testing\"\n\n\t\"github.com/stretchr/testify/require\"\n)\n\nvar sampleConfiguration = Configuration{\n\tServers: []Server{\n\t\t{\n\t\t\tSuffrage: Nonvoter,\n\t\t\tID:       ServerID(\"id0\"),\n\t\t\tAddress:  ServerAddress(\"addr0\"),\n\t\t},\n\t\t{\n\t\t\tSuffrage: Voter,\n\t\t\tID:       ServerID(\"id1\"),\n\t\t\tAddress:  ServerAddress(\"addr1\"),\n\t\t},\n\t\t{\n\t\t\tSuffrage: Staging,\n\t\t\tID:       ServerID(\"id2\"),\n\t\t\tAddress:  ServerAddress(\"addr2\"),\n\t\t},\n\t},\n}\n\nfunc TestConfiguration_Configuration_Clone(t *testing.T) {\n\tcloned := sampleConfiguration.Clone()\n\tif !reflect.DeepEqual(sampleConfiguration, cloned) {\n\t\tt.Fatalf(\"mismatch %v %v\", sampleConfiguration, cloned)\n\t}\n\tcloned.Servers[1].ID = \"scribble\"\n\tif sampleConfiguration.Servers[1].ID == \"scribble\" {\n\t\tt.Fatalf(\"cloned configuration shouldn't alias Servers\")\n\t}\n}\n\nfunc TestConfiguration_configurations_Clone(t *testing.T) {\n\tconfiguration := configurations{\n\t\tcommitted:      sampleConfiguration,\n\t\tcommittedIndex: 1,\n\t\tlatest:         sampleConfiguration,\n\t\tlatestIndex:    2,\n\t}\n\tcloned := configuration.Clone()\n\tif !reflect.DeepEqual(configuration, cloned) {\n\t\tt.Fatalf(\"mismatch %v %v\", configuration, cloned)\n\t}\n\tcloned.committed.Servers[1].ID = \"scribble\"\n\tcloned.latest.Servers[1].ID = \"scribble\"\n\tif configuration.committed.Servers[1].ID == \"scribble\" ||\n\t\tconfiguration.latest.Servers[1].ID == \"scribble\" {\n\t\tt.Fatalf(\"cloned configuration shouldn't alias Servers\")\n\t}\n}\n\nfunc TestConfiguration_hasVote(t *testing.T) {\n\tif hasVote(sampleConfiguration, \"id0\") {\n\t\tt.Fatalf(\"id0 should not have vote\")\n\t}\n\tif !hasVote(sampleConfiguration, \"id1\") {\n\t\tt.Fatalf(\"id1 should have vote\")\n\t}\n\tif hasVote(sampleConfiguration, \"id2\") {\n\t\tt.Fatalf(\"id2 should not have vote\")\n\t}\n\tif hasVote(sampleConfiguration, \"someotherid\") {\n\t\tt.Fatalf(\"someotherid should not have vote\")\n\t}\n}\n\nfunc TestConfiguration_checkConfiguration(t *testing.T) {\n\tvar configuration Configuration\n\tif checkConfiguration(configuration) == nil {\n\t\tt.Fatalf(\"empty configuration should be error\")\n\t}\n\n\tconfiguration.Servers = append(configuration.Servers, Server{\n\t\tSuffrage: Nonvoter,\n\t\tID:       ServerID(\"id0\"),\n\t\tAddress:  ServerAddress(\"addr0\"),\n\t})\n\tif checkConfiguration(configuration) == nil {\n\t\tt.Fatalf(\"lack of voter should be error\")\n\t}\n\n\tconfiguration.Servers = append(configuration.Servers, Server{\n\t\tSuffrage: Voter,\n\t\tID:       ServerID(\"id1\"),\n\t\tAddress:  ServerAddress(\"addr1\"),\n\t})\n\tif err := checkConfiguration(configuration); err != nil {\n\t\tt.Fatalf(\"should be OK: %v\", err)\n\t}\n\n\tconfiguration.Servers[1].ID = \"id0\"\n\terr := checkConfiguration(configuration)\n\tif err == nil {\n\t\tt.Fatalf(\"duplicate ID should be error\")\n\t}\n\tif !strings.Contains(err.Error(), \"duplicate ID\") {\n\t\tt.Fatalf(\"unexpected error: %v\", err)\n\t}\n\tconfiguration.Servers[1].ID = \"id1\"\n\n\tconfiguration.Servers[1].Address = \"addr0\"\n\terr = checkConfiguration(configuration)\n\tif err == nil {\n\t\tt.Fatalf(\"duplicate address should be error\")\n\t}\n\tif !strings.Contains(err.Error(), \"duplicate address\") {\n\t\tt.Fatalf(\"unexpected error: %v\", err)\n\t}\n}\n\nvar singleServer = Configuration{\n\tServers: []Server{\n\t\t{\n\t\t\tSuffrage: Voter,\n\t\t\tID:       ServerID(\"id1\"),\n\t\t\tAddress:  ServerAddress(\"addr1x\"),\n\t\t},\n\t},\n}\n\nvar oneOfEach = Configuration{\n\tServers: []Server{\n\t\t{\n\t\t\tSuffrage: Voter,\n\t\t\tID:       ServerID(\"id1\"),\n\t\t\tAddress:  ServerAddress(\"addr1x\"),\n\t\t},\n\t\t{\n\t\t\tSuffrage: Staging,\n\t\t\tID:       ServerID(\"id2\"),\n\t\t\tAddress:  ServerAddress(\"addr2x\"),\n\t\t},\n\t\t{\n\t\t\tSuffrage: Nonvoter,\n\t\t\tID:       ServerID(\"id3\"),\n\t\t\tAddress:  ServerAddress(\"addr3x\"),\n\t\t},\n\t},\n}\n\nvar voterPair = Configuration{\n\tServers: []Server{\n\t\t{\n\t\t\tSuffrage: Voter,\n\t\t\tID:       ServerID(\"id1\"),\n\t\t\tAddress:  ServerAddress(\"addr1x\"),\n\t\t},\n\t\t{\n\t\t\tSuffrage: Voter,\n\t\t\tID:       ServerID(\"id2\"),\n\t\t\tAddress:  ServerAddress(\"addr2x\"),\n\t\t},\n\t},\n}\n\nvar nextConfigurationTests = []struct {\n\tcurrent  Configuration\n\tcommand  ConfigurationChangeCommand\n\tserverID int\n\tnext     string\n}{\n\t// AddStaging: was missing.\n\t{Configuration{}, AddStaging, 1, \"{[{Voter id1 addr1}]}\"},\n\t{singleServer, AddStaging, 2, \"{[{Voter id1 addr1x} {Voter id2 addr2}]}\"},\n\t// AddStaging: was Voter.\n\t{singleServer, AddStaging, 1, \"{[{Voter id1 addr1}]}\"},\n\t// AddStaging: was Staging.\n\t{oneOfEach, AddStaging, 2, \"{[{Voter id1 addr1x} {Voter id2 addr2} {Nonvoter id3 addr3x}]}\"},\n\t// AddStaging: was Nonvoter.\n\t{oneOfEach, AddStaging, 3, \"{[{Voter id1 addr1x} {Staging id2 addr2x} {Voter id3 addr3}]}\"},\n\n\t// AddVoter: was missing.\n\t{Configuration{}, AddVoter, 1, \"{[{Voter id1 addr1}]}\"},\n\t{singleServer, AddVoter, 2, \"{[{Voter id1 addr1x} {Voter id2 addr2}]}\"},\n\t// AddVoter: was Voter.\n\t{singleServer, AddVoter, 1, \"{[{Voter id1 addr1}]}\"},\n\t// AddVoter: was Staging.\n\t{oneOfEach, AddVoter, 2, \"{[{Voter id1 addr1x} {Voter id2 addr2} {Nonvoter id3 addr3x}]}\"},\n\t// AddVoter: was Nonvoter.\n\t{oneOfEach, AddVoter, 3, \"{[{Voter id1 addr1x} {Staging id2 addr2x} {Voter id3 addr3}]}\"},\n\n\t// AddNonvoter: was missing.\n\t{singleServer, AddNonvoter, 2, \"{[{Voter id1 addr1x} {Nonvoter id2 addr2}]}\"},\n\t// AddNonvoter: was Voter.\n\t{singleServer, AddNonvoter, 1, \"{[{Voter id1 addr1}]}\"},\n\t// AddNonvoter: was Staging.\n\t{oneOfEach, AddNonvoter, 2, \"{[{Voter id1 addr1x} {Staging id2 addr2} {Nonvoter id3 addr3x}]}\"},\n\t// AddNonvoter: was Nonvoter.\n\t{oneOfEach, AddNonvoter, 3, \"{[{Voter id1 addr1x} {Staging id2 addr2x} {Nonvoter id3 addr3}]}\"},\n\n\t// DemoteVoter: was missing.\n\t{singleServer, DemoteVoter, 2, \"{[{Voter id1 addr1x}]}\"},\n\t// DemoteVoter: was Voter.\n\t{voterPair, DemoteVoter, 2, \"{[{Voter id1 addr1x} {Nonvoter id2 addr2x}]}\"},\n\t// DemoteVoter: was Staging.\n\t{oneOfEach, DemoteVoter, 2, \"{[{Voter id1 addr1x} {Nonvoter id2 addr2x} {Nonvoter id3 addr3x}]}\"},\n\t// DemoteVoter: was Nonvoter.\n\t{oneOfEach, DemoteVoter, 3, \"{[{Voter id1 addr1x} {Staging id2 addr2x} {Nonvoter id3 addr3x}]}\"},\n\n\t// RemoveServer: was missing.\n\t{singleServer, RemoveServer, 2, \"{[{Voter id1 addr1x}]}\"},\n\t// RemoveServer: was Voter.\n\t{voterPair, RemoveServer, 2, \"{[{Voter id1 addr1x}]}\"},\n\t// RemoveServer: was Staging.\n\t{oneOfEach, RemoveServer, 2, \"{[{Voter id1 addr1x} {Nonvoter id3 addr3x}]}\"},\n\t// RemoveServer: was Nonvoter.\n\t{oneOfEach, RemoveServer, 3, \"{[{Voter id1 addr1x} {Staging id2 addr2x}]}\"},\n\n\t// Promote: was missing.\n\t{singleServer, Promote, 2, \"{[{Voter id1 addr1x}]}\"},\n\t// Promote: was Voter.\n\t{singleServer, Promote, 1, \"{[{Voter id1 addr1x}]}\"},\n\t// Promote: was Staging.\n\t{oneOfEach, Promote, 2, \"{[{Voter id1 addr1x} {Voter id2 addr2x} {Nonvoter id3 addr3x}]}\"},\n\t// Promote: was Nonvoter.\n\t{oneOfEach, Promote, 3, \"{[{Voter id1 addr1x} {Staging id2 addr2x} {Nonvoter id3 addr3x}]}\"},\n}\n\nfunc TestConfiguration_nextConfiguration_table(t *testing.T) {\n\tfor i, tt := range nextConfigurationTests {\n\t\treq := configurationChangeRequest{\n\t\t\tcommand:       tt.command,\n\t\t\tserverID:      ServerID(fmt.Sprintf(\"id%d\", tt.serverID)),\n\t\t\tserverAddress: ServerAddress(fmt.Sprintf(\"addr%d\", tt.serverID)),\n\t\t}\n\t\tnext, err := nextConfiguration(tt.current, 1, req)\n\t\tif err != nil {\n\t\t\tt.Errorf(\"nextConfiguration %d should have succeeded, got %v\", i, err)\n\t\t\tcontinue\n\t\t}\n\t\tif fmt.Sprintf(\"%v\", next) != tt.next {\n\t\t\tt.Errorf(\"nextConfiguration %d returned %v, expected %s\", i, next, tt.next)\n\t\t\tcontinue\n\t\t}\n\t}\n}\n\nfunc TestConfiguration_nextConfiguration_prevIndex(t *testing.T) {\n\t// Stale prevIndex.\n\treq := configurationChangeRequest{\n\t\tcommand:       AddVoter,\n\t\tserverID:      ServerID(\"id1\"),\n\t\tserverAddress: ServerAddress(\"addr1\"),\n\t\tprevIndex:     1,\n\t}\n\t_, err := nextConfiguration(singleServer, 2, req)\n\tif err == nil || !strings.Contains(err.Error(), \"changed\") {\n\t\tt.Fatalf(\"nextConfiguration should have failed due to intervening configuration change\")\n\t}\n\n\t// Current prevIndex.\n\treq = configurationChangeRequest{\n\t\tcommand:       AddVoter,\n\t\tserverID:      ServerID(\"id2\"),\n\t\tserverAddress: ServerAddress(\"addr2\"),\n\t\tprevIndex:     2,\n\t}\n\t_, err = nextConfiguration(singleServer, 2, req)\n\tif err != nil {\n\t\tt.Fatalf(\"nextConfiguration should have succeeded, got %v\", err)\n\t}\n\n\t// Zero prevIndex.\n\treq = configurationChangeRequest{\n\t\tcommand:       AddVoter,\n\t\tserverID:      ServerID(\"id3\"),\n\t\tserverAddress: ServerAddress(\"addr3\"),\n\t\tprevIndex:     0,\n\t}\n\t_, err = nextConfiguration(singleServer, 2, req)\n\tif err != nil {\n\t\tt.Fatalf(\"nextConfiguration should have succeeded, got %v\", err)\n\t}\n}\n\nfunc TestConfiguration_nextConfiguration_checkConfiguration(t *testing.T) {\n\treq := configurationChangeRequest{\n\t\tcommand:       AddNonvoter,\n\t\tserverID:      ServerID(\"id1\"),\n\t\tserverAddress: ServerAddress(\"addr1\"),\n\t}\n\t_, err := nextConfiguration(Configuration{}, 1, req)\n\tif err == nil || !strings.Contains(err.Error(), \"at least one voter\") {\n\t\tt.Fatalf(\"nextConfiguration should have failed for not having a voter\")\n\t}\n}\n\nfunc TestConfiguration_encodeDecodePeers(t *testing.T) {\n\t// Set up configuration.\n\tvar configuration Configuration\n\tfor i := 0; i < 3; i++ {\n\t\taddress := NewInmemAddr()\n\t\tconfiguration.Servers = append(configuration.Servers, Server{\n\t\t\tSuffrage: Voter,\n\t\t\tID:       ServerID(address),\n\t\t\tAddress:  ServerAddress(address),\n\t\t})\n\t}\n\n\t// Encode into the old format.\n\t_, trans := NewInmemTransport(\"\")\n\tbuf := encodePeers(configuration, trans)\n\n\t// Decode from old format, as if reading an old log entry.\n\tdecoded, err := decodePeers(buf, trans)\n\trequire.NoError(t, err)\n\tif !reflect.DeepEqual(configuration, decoded) {\n\t\tt.Fatalf(\"mismatch %v %v\", configuration, decoded)\n\t}\n}\n\nfunc TestConfiguration_encodeDecodeConfiguration(t *testing.T) {\n\tdecoded := DecodeConfiguration(EncodeConfiguration(sampleConfiguration))\n\tif !reflect.DeepEqual(sampleConfiguration, decoded) {\n\t\tt.Fatalf(\"mismatch %v %v\", sampleConfiguration, decoded)\n\t}\n}\n"
  },
  {
    "path": "discard_snapshot.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"fmt\"\n\t\"io\"\n)\n\n// DiscardSnapshotStore is used to successfully snapshot while\n// always discarding the snapshot. This is useful for when the\n// log should be truncated but no snapshot should be retained.\n// This should never be used for production use, and is only\n// suitable for testing.\ntype DiscardSnapshotStore struct{}\n\n// DiscardSnapshotSink is used to fulfill the SnapshotSink interface\n// while always discarding the . This is useful for when the log\n// should be truncated but no snapshot should be retained. This\n// should never be used for production use, and is only suitable\n// for testing.\ntype DiscardSnapshotSink struct{}\n\n// NewDiscardSnapshotStore is used to create a new DiscardSnapshotStore.\nfunc NewDiscardSnapshotStore() *DiscardSnapshotStore {\n\treturn &DiscardSnapshotStore{}\n}\n\n// Create returns a valid type implementing the SnapshotSink which\n// always discards the snapshot.\nfunc (d *DiscardSnapshotStore) Create(version SnapshotVersion, index, term uint64,\n\tconfiguration Configuration, configurationIndex uint64, trans Transport) (SnapshotSink, error) {\n\treturn &DiscardSnapshotSink{}, nil\n}\n\n// List returns successfully with a nil for []*SnapshotMeta.\nfunc (d *DiscardSnapshotStore) List() ([]*SnapshotMeta, error) {\n\treturn nil, nil\n}\n\n// Open returns an error since the DiscardSnapshotStore does not\n// support opening snapshots.\nfunc (d *DiscardSnapshotStore) Open(id string) (*SnapshotMeta, io.ReadCloser, error) {\n\treturn nil, nil, fmt.Errorf(\"open is not supported\")\n}\n\n// Write returns successfully with the length of the input byte slice\n// to satisfy the WriteCloser interface\nfunc (d *DiscardSnapshotSink) Write(b []byte) (int, error) {\n\treturn len(b), nil\n}\n\n// Close returns a nil error\nfunc (d *DiscardSnapshotSink) Close() error {\n\treturn nil\n}\n\n// ID returns \"discard\" for DiscardSnapshotSink\nfunc (d *DiscardSnapshotSink) ID() string {\n\treturn \"discard\"\n}\n\n// Cancel returns successfully with a nil error\nfunc (d *DiscardSnapshotSink) Cancel() error {\n\treturn nil\n}\n"
  },
  {
    "path": "discard_snapshot_test.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport \"testing\"\n\nfunc TestDiscardSnapshotStoreImpl(t *testing.T) {\n\tvar impl interface{} = &DiscardSnapshotStore{}\n\tif _, ok := impl.(SnapshotStore); !ok {\n\t\tt.Fatalf(\"DiscardSnapshotStore not a SnapshotStore\")\n\t}\n}\n\nfunc TestDiscardSnapshotSinkImpl(t *testing.T) {\n\tvar impl interface{} = &DiscardSnapshotSink{}\n\tif _, ok := impl.(SnapshotSink); !ok {\n\t\tt.Fatalf(\"DiscardSnapshotSink not a SnapshotSink\")\n\t}\n}\n"
  },
  {
    "path": "docs/README.md",
    "content": "# Raft Developer Documentation\n\nThis documentation provides a high level introduction to the `hashicorp/raft`\nimplementation. The intended audience is anyone interested in understanding\nor contributing to the code.\n\n## Contents\n\n1. [Terminology](#terminology)\n2. [Operations](#operations)\n   1. [Apply](./apply.md)\n3. [Threads](#threads)\n4. [Divergence](./divergence.md)\n\n\n## Terminology\n\nThis documentation uses the following terms as defined.\n\n* **Cluster** - the set of peers in the raft configuration\n* **Peer** - a node that participates in the consensus protocol using `hashicorp/raft`. A\n  peer may be in one of the following states: **follower**, **candidate**, or **leader**.\n* **Log** - the full set of log entries.\n* **Log Entry** - an entry in the log. Each entry has an index that is used to order it\n  relative to other log entries.\n  * **Committed** -  A log entry is considered committed if it is safe for that entry to be\n    applied to state machines. A log entry is committed once the leader that created the\n    entry has replicated it on a majority of the peers. A peer has successfully\n    replicated the entry once it is persisted.\n  * **Applied** - log entry applied to the state machine (FSM)\n* **Term** - raft divides time into terms of arbitrary length. Terms are numbered with\n  consecutive integers. Each term begins with an election, in which one or more candidates\n  attempt to become leader. If a candidate wins the election, then it serves as leader for\n  the rest of the term. If the election ends with a split vote, the term will end with no\n  leader.\n* **FSM** - finite state machine, stores the cluster state\n* **Client** - the application that uses the `hashicorp/raft` library\n\n## Operations\n\n### Leader Write\n\nMost write operations must be performed on the leader.\n\n* RequestConfigChange - update the raft peer list configuration\n* Apply - apply a log entry to the log on a majority of peers, and the FSM. See [raft apply](apply.md) for more details.\n* Barrier - a special Apply that does not modify the FSM, used to wait for previous logs to be applied\n* LeadershipTransfer - stop accepting client requests, and tell a different peer to start a leadership election\n* Restore (Snapshot) - overwrite the cluster state with the contents of the snapshot (excluding cluster configuration)\n* VerifyLeader - send a heartbeat to all voters to confirm the peer is still the leader\n\n### Follower Write\n\n* BootstrapCluster - store the cluster configuration in the local log store\n\n\n### Read\n\nRead operations can be performed on a peer in any state.\n\n* AppliedIndex - get the index of the last log entry applied to the FSM\n* GetConfiguration - return the latest cluster configuration\n* LastContact - get the last time this peer made contact with the leader\n* LastIndex - get the index of the latest stored log entry\n* Leader - get the address of the peer that is currently the leader\n* Snapshot - snapshot the current state of the FSM into a file\n* State - return the state of the peer\n* Stats - return some stats about the peer and the cluster\n\n## Threads\n\nRaft uses the following threads to handle operations. The name of the thread is in bold,\nand a short description of the operation handled by the thread follows. The main thread is\nresponsible for handling many operations.\n\n* **run** (main thread) - different behaviour based on peer state\n   * follower\n      * processRPC (from rpcCh)\n         * AppendEntries\n         * RequestVote\n         * InstallSnapshot\n         * TimeoutNow\n      * liveBootstrap (from bootstrapCh)\n      * periodic heartbeatTimer (HeartbeatTimeout)\n   * candidate - starts an election for itself when called\n      * processRPC (from rpcCh) - same as follower\n      * acceptVote (from askPeerForVote)\n   * leader - first starts replication to all peers, and applies a Noop log to ensure the new leader has committed up to the commit index\n      * processRPC (from rpcCh) - same as follower, however we don’t actually expect to receive any RPCs other than a RequestVote\n      * leadershipTransfer (from leadershipTransferCh) - \n      * commit (from commitCh) -\n      * verifyLeader (from verifyCh) -\n      * user restore snapshot (from userRestoreCh) -\n      * changeConfig (from configurationChangeCh) -\n      * dispatchLogs (from applyCh) - handle client Raft.Apply requests by persisting logs to disk, and notifying replication goroutines to replicate the new logs\n      * checkLease (periodically LeaseTimeout) -\n* **runFSM** - has exclusive access to the FSM, all reads and writes must send a message to this thread. Commands:\n   * apply logs to the FSM, from the fsmMutateCh, from processLogs, from leaderLoop (leader) or appendEntries RPC (follower/candidate)\n   * restore a snapshot to the FSM, from the fsmMutateCh, from restoreUserSnapshot (leader) or installSnapshot RPC (follower/candidate)\n   * capture snapshot, from fsmSnapshotCh, from takeSnapshot (runSnapshot thread)\n* **runSnapshot** - handles the slower part of taking a snapshot. From a pointer captured by the FSM.Snapshot operation, this thread persists the snapshot by calling FSMSnapshot.Persist. Also calls compactLogs to delete old logs.\n   * periodically (SnapshotInterval) takeSnapshot for log compaction\n   * user snapshot, from userSnapshotCh, takeSnapshot to return to the user\n* **askPeerForVote (candidate only)** - short lived goroutine that synchronously sends a RequestVote RPC to all voting peers, and waits for the response. One goroutine per voting peer.\n* **replicate (leader only)** - long running goroutine that synchronously sends log entry AppendEntry RPCs to all peers. Also starts the heartbeat thread, and possibly the pipelineDecode thread. Runs sendLatestSnapshot when AppendEntry fails.\n   * **heartbeat (leader only)** - long running goroutine that synchronously sends heartbeat AppendEntry RPCs to all peers.\n   * **pipelineDecode (leader only)**\n"
  },
  {
    "path": "docs/apply.md",
    "content": "# Raft Apply\n\nApply is the primary operation provided by raft. A client calls `raft.Apply` to apply\na command to the FSM. A command will first be committed, i.e., durably stored on a\nquorum of raft nodes. Then, the committed command is applied to fsm.\n\nThis sequence diagram shows the steps involved in a `raft.Apply` operation. Each box\nacross the top is a separate thread. The name in the box identifies the state of the peer\n(leader or follower) and the thread (`<peer state>:<thread name>`). When there are\nmultiple copies of the thread, it is indicated with `(each peer)`.\n\n```mermaid\nsequenceDiagram\n   autonumber\n \n   participant client\n   participant leadermain as leader:main\n   participant leaderfsm as leader:fsm\n   participant leaderreplicate as leader:replicate (each peer)\n   participant followermain as follower:main (each peer)\n   participant followerfsm as follower:fsm (each peer)\n \n   client-)leadermain: applyCh to dispatchLogs\n   leadermain->>leadermain: store logs to disk\n \n   leadermain-)leaderreplicate: triggerCh\n   leaderreplicate-->>followermain: Transport.AppendEntries RPC\n \n   followermain->>followermain: store logs to disk\n \n   opt leader commit index is ahead of peer commit index\n       followermain-)followerfsm: fsmMutateCh <br>apply committed logs\n       followerfsm->>followerfsm: fsm.Apply\n   end\n \n   followermain-->>leaderreplicate: respond success=true\n   leaderreplicate->>leaderreplicate: update commitment\n \n   opt quorum commit index has increased\n       leaderreplicate-)leadermain: commitCh\n       leadermain-)leaderfsm: fsmMutateCh\n       leaderfsm->>leaderfsm: fsm.Apply\n       leaderfsm-)client: future.respond\n   end\n\n```\n\nFollowing is the description of each step as shown in the above diagram\n\n1. The raft node handles the `raft.Apply` call by creating a new log entry and send the entry\nto the `applyCh` channel.\n\n2. If the node is not a leader, the method will return an error of `ErrNotLeader`. Otherwise,\nthe main loop of the leader node calls `raft.dispatchLogs` to write the log entry locally.\n\n3. `raft.dispatchLogs` also sends a notification to the `f.triggerCh` of each follower (`map[ServerID]*followerReplication`) to start replicating log entries to the followers.\n\n4. For each follower, the leader has started a long running routine (`replicate`) to\nreplicates log entries. On receiving a log entry to the `triggerCh`, the `replicate`\nroutine makes the `Transport.AppendEntries` RPC call to do the replication. The log entries\nto be replicated are from the follower's nextIndex to min(nextIndex + maxAppendEntries, \nleader's lastIndex). Another parameter to AppendEntries is the LeaderCommitIndex. Following\nis some examples:\n\n```\nAppendEntries(Log: 1..5, LeaderCommitIndex: 0)   // Replicating log entries 1..5, \n                                                 // the leader hasn't committed any log entry;\nAppendEntries(Log: 6..8, LeaderCommitIndex: 4)   // Replicating log entries 6..8,\n                                                 // log 0..4 are committed after the leader receives\n                                                 // a quorum of responses\nAppendEntries(Log: 9, LeaderCommitIndex: 8)      // Replicating log entry 9,\n                                                 // log 5..8 are committed.\nAppendEntries(Log: , LeaderCommitIndex: 9)       // no new log, bumping the commit index\n                                                 // to let the follower stay up to date of the\n                                                 // latest committed entries\n```\n\n5. The follower which receives the `appendEntries` RPC calls invokes `raft.appendEntries` to handle\nthe request. It appends any new entries to the local log store.\n\n6. In the same method on the follower as step 5, if the LeaderCommitIndex > this follower's\ncommitIndex, the follower updates it's commitIndex to min(LeaderCommitIndex, index of its last\nlog entries). In the first `AppendEntries` call of the above example, the follower won't\nupdate it's commitIndex, because LeaderCommitIndex is 0. The last RPC call doesn't contain\nany new log, whereas the follower will update its commitIndex to 9.\n\nFurther, the follower start `processLogs` to send all the committed entries that haven't been\napplied to fsm (`fsmMutateCh <- batch`). Otherwise (i.e., `commitIndex <= lastApplied`),\nthe appendEntries RPC call returns success.\n\nTherefore, it's possible that a very small window of time exists when all followers have\ncommitted the log to disk, the write has been realized in the FSM of the leader but the\nfollowers have not yet applied the log to their FSM.\n\n7. The peer applies the committed entries to the FSM.\n\n8. If all went well, the follower responds success (`resp.Success = true`) to the \n`appendEntries` RPC call.\n\n9. On receiving the successful response from `Transport.AppendEntries`, the leader needs to\nupdate the fsm based on the replicated log entries. Specifically, the leader finds the\nhighest log entry index that has been replicated to a quorum of the servers (\n`if quorumMatchIndex > c.commitIndex`), update `commitIndex` to that index, and\nnotify through the `commitCh` channel.\n\n10. The leader receives the notification on the  `r.leaderState.commitCh` channel and starts\ngrouping the entries that can be applied to the fsm.\n\n11. `processLogs` applies all the committed entries that haven't been applied by batching the log entries and forwarding them through the `fsmMutateCh` channel to fsm.\n\n12. The actual place applying the committed log entries is in the main loop of `runFSM()`.\n\n13. After the log entries that contains the client req are applied to the fsm, the fsm\nmodule will set the responses to the client request (`req.future.respond(nil)`). From the\nclient's point of view, the future returned by `raft.Apply` should now be unblocked and\ncalls to `Error()` or `Response()` should return the data at this point.\n"
  },
  {
    "path": "docs/divergence.md",
    "content": "# HashiCorp Raft Divergences\n\nIn 2013 HashiCorp created its own Raft implementation based on the just\nreleased [Raft paper by Diego Ongaro and John Ousterhout][paper]. This was\nbefore [Diego's subsequent Raft dissertation][diss] in 2014, and long before\nthird party analyses such as Heidi Howard and Ittai Abraham's [Raft does not\nGuarantee Liveness in the face of Network Faults ][live]\nin 2020.[^1]\n\nHashiCorp's Raft library usage grew rapidly through its use in [Consul][consul]\nand [Nomad][nomad], and [later Vault][vault], in parallel with rapidly\nexpanding use in [etcd][etcd] and other implementations.\n\nThe explosion in activity between live systems and research led to a wide\ndivergence between not only implementations, but implementations and the\noriginal paper and dissertation.\n\nThis document attempts to explain where HashiCorp Raft either meaningfully diverges\nfrom the original Raft paper, or makes an implementation choice not explicitly\noutlined in the paper.\n\nThis is **not** expected to be a comprehensive list. Additions and edits are\nwelcome!\n\n## Asynchronous Heartbeats\n\nThe Raft paper defines heartbeats as empty AppendEntries RPCs which are sent by\nthe leader to each server after elections and during idle periods to prevent\nelection timeouts.\n\nHashiCorp Raft performs [heartbeating concurrently][async-heart] with other\nAppendEntries RPCs to avoid having to set the election timeout high enough to\naccount for the max acceptable disk operation. This allows the heartbeat\ntimeout to detect network partitions much more quickly without risking causing\nan election during periodic but ephemeral spikes in disk io latency.\n\n## Rejecting votes when there's already a leader\n\nThe [Raft does not Guarantee liveness][live] paper describes how certain\npartitions can prevent Raft clusters from making progress by causing continual\nelections.\n\nHashiCorp Raft implements the second of the suggested fixes from Howard's\npaper: rejecting vote request RPCs when there is already an established leader.\nThe paper defines this more precisely as:\n\n> ...ignore RequestVote RPCs if they have received an AppendEntries RPC from\n> the leader within the election timeout. \n\nThis approach is actually mentioned in the Cluster membership changes section\nof the original Raft paper, but explicitly excludes its use during \"normal\"\nelections:\n\n> To prevent this problem, servers disregard RequestVote RPCs when they believe\n> a current leader exists. Specifically, if a server receives a RequestVote RPC\n> within the minimum election timeout of hearing from a current leader, it does\n> not update its term or grant its vote.  This does not affect normal\n> elections...\n\nSo HashiCorp Raft follows the later paper's suggestion and ignores the original\npaper's exclusion of this logic during normal operation.\n\n## Pre-Vote\n\n[HashiCorp Raft implements the Pre-Vote extension][prevote-pr] defined in the\n[Raft dissertation][diss] (§9.6). Pre-Vote is an optimization where a candidate\ndiscovers whether its index is up to date and therefore able to win an election\nbefore incrementing its term and causing an election.\n\nThe Pre-Vote extension is enabled by default but may be disabled in using the\n[Config.PreVoteDisabled][prevote-config] flag.\n\n## Leadership Transfer\n\n[HashiCorp Raft implements the Leadership Transfer extension][transleader-pr]\nas defined in the [Raft dissertation][diss] (§3.10). Leadership transfer is an\noptimization that allows the current leader to hand off leadership to a\nfollower to avoid waiting for the election timeout during regular operations\nsuch as restarts and upgrades.\n\nWhile leadership transfer in defined in the Raft dissertation, HashiCorp Raft\nextends the specification slightly because of _another_ divergence in HashiCorp\nRaft: [rejecting votes when there's already a\nleader](#rejecting-votes-when-theres-already-a-leader). Since other followers\nwould reject the intended new-leader's request for a vote, HashiCorp Raft adds\nan extra [`LeadershipTransfer` flag][transleader-flag] to override that\nbehavior in the case of leadership transfers.\n\nAll Raft members should support leadership transfers before a transfer is\nattempted. The feature is **not** enabled by default and requires explicitly\ntriggering at the application level. Consul was the first to implement this via\nmechanisms in their [API/CLI][transleader-cli] and [graceful agent\nshutdown][transleader-shutdown].\n\n[^1]: See https://raft.github.io/ for a comprehensive list of papers and\n    resources.\n\n[paper]: https://raft.github.io/raft.pdf\n[diss]: https://github.com/ongardie/dissertation#readme\n[live]: https://decentralizedthoughts.github.io/2020-12-12-raft-liveness-full-omission/\n[consul]: https://github.com/hashicorp/consul\n[nomad]: https://github.com/hashicorp/nomad\n[vault]: https://github.com/hashicorp/vault\n[etcd]: https://etcd.io/\n[async-heart]: https://github.com/hashicorp/raft/blob/v1.7.3/replication.go#L385-L387\n[prevote-pr]: https://github.com/hashicorp/raft/pull/530\n[prevote-config]: https://pkg.go.dev/github.com/hashicorp/raft#Config.PreVoteDisabled\n[transleader-pr]: https://github.com/hashicorp/raft/pull/306\n[transleader-flag]: https://pkg.go.dev/github.com/hashicorp/raft#RequestVoteRequest.LeadershipTransfer\n[transleader-cli]: https://github.com/hashicorp/consul/issues/5405\n[transleader-shutdown]: https://github.com/hashicorp/consul/issues/5406\n"
  },
  {
    "path": "file_snapshot.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"bufio\"\n\t\"bytes\"\n\t\"encoding/json\"\n\t\"fmt\"\n\t\"hash\"\n\t\"hash/crc64\"\n\t\"io\"\n\t\"os\"\n\t\"path/filepath\"\n\t\"runtime\"\n\t\"sort\"\n\t\"strings\"\n\t\"time\"\n\n\thclog \"github.com/hashicorp/go-hclog\"\n)\n\nconst (\n\ttestPath      = \"permTest\"\n\tsnapPath      = \"snapshots\"\n\tmetaFilePath  = \"meta.json\"\n\tstateFilePath = \"state.bin\"\n\ttmpSuffix     = \".tmp\"\n)\n\n// FileSnapshotStore implements the SnapshotStore interface and allows\n// snapshots to be made on the local disk.\ntype FileSnapshotStore struct {\n\tpath   string\n\tretain int\n\tlogger hclog.Logger\n\n\t// noSync, if true, skips crash-safe file fsync api calls.\n\t// It's a private field, only used in testing\n\tnoSync bool\n}\n\ntype snapMetaSlice []*fileSnapshotMeta\n\n// FileSnapshotSink implements SnapshotSink with a file.\ntype FileSnapshotSink struct {\n\tstore     *FileSnapshotStore\n\tlogger    hclog.Logger\n\tdir       string\n\tparentDir string\n\tmeta      fileSnapshotMeta\n\n\tnoSync bool\n\n\tstateFile *os.File\n\tstateHash hash.Hash64\n\tbuffered  *bufio.Writer\n\n\tclosed bool\n}\n\n// fileSnapshotMeta is stored on disk. We also put a CRC\n// on disk so that we can verify the snapshot.\ntype fileSnapshotMeta struct {\n\tSnapshotMeta\n\tCRC []byte\n}\n\n// bufferedFile is returned when we open a snapshot. This way\n// reads are buffered and the file still gets closed.\ntype bufferedFile struct {\n\tbh *bufio.Reader\n\tfh *os.File\n}\n\nfunc (b *bufferedFile) Read(p []byte) (n int, err error) {\n\treturn b.bh.Read(p)\n}\n\nfunc (b *bufferedFile) Close() error {\n\treturn b.fh.Close()\n}\n\n// NewFileSnapshotStoreWithLogger creates a new FileSnapshotStore based\n// on a base directory. The `retain` parameter controls how many\n// snapshots are retained. Must be at least 1.\nfunc NewFileSnapshotStoreWithLogger(base string, retain int, logger hclog.Logger) (*FileSnapshotStore, error) {\n\tif retain < 1 {\n\t\treturn nil, fmt.Errorf(\"must retain at least one snapshot\")\n\t}\n\tif logger == nil {\n\t\tlogger = hclog.New(&hclog.LoggerOptions{\n\t\t\tName:   \"snapshot\",\n\t\t\tOutput: hclog.DefaultOutput,\n\t\t\tLevel:  hclog.DefaultLevel,\n\t\t})\n\t}\n\n\t// Ensure our path exists\n\tpath := filepath.Join(base, snapPath)\n\tif err := os.MkdirAll(path, 0o755); err != nil && !os.IsExist(err) {\n\t\treturn nil, fmt.Errorf(\"snapshot path not accessible: %v\", err)\n\t}\n\n\t// Setup the store\n\tstore := &FileSnapshotStore{\n\t\tpath:   path,\n\t\tretain: retain,\n\t\tlogger: logger,\n\t}\n\n\t// Do a permissions test\n\tif err := store.testPermissions(); err != nil {\n\t\treturn nil, fmt.Errorf(\"permissions test failed: %v\", err)\n\t}\n\treturn store, nil\n}\n\n// NewFileSnapshotStore creates a new FileSnapshotStore based\n// on a base directory. The `retain` parameter controls how many\n// snapshots are retained. Must be at least 1.\nfunc NewFileSnapshotStore(base string, retain int, logOutput io.Writer) (*FileSnapshotStore, error) {\n\tif logOutput == nil {\n\t\tlogOutput = os.Stderr\n\t}\n\treturn NewFileSnapshotStoreWithLogger(base, retain, hclog.New(&hclog.LoggerOptions{\n\t\tName:   \"snapshot\",\n\t\tOutput: logOutput,\n\t\tLevel:  hclog.DefaultLevel,\n\t}))\n}\n\n// testPermissions tries to touch a file in our path to see if it works.\nfunc (f *FileSnapshotStore) testPermissions() error {\n\tpath := filepath.Join(f.path, testPath)\n\tfh, err := os.Create(path)\n\tif err != nil {\n\t\treturn err\n\t}\n\n\tif err = fh.Close(); err != nil {\n\t\treturn err\n\t}\n\n\tif err = os.Remove(path); err != nil {\n\t\treturn err\n\t}\n\treturn nil\n}\n\n// snapshotName generates a name for the snapshot.\nfunc snapshotName(term, index uint64) string {\n\tnow := time.Now()\n\tmsec := now.UnixNano() / int64(time.Millisecond)\n\treturn fmt.Sprintf(\"%d-%d-%d\", term, index, msec)\n}\n\n// Create is used to start a new snapshot\nfunc (f *FileSnapshotStore) Create(version SnapshotVersion, index, term uint64,\n\tconfiguration Configuration, configurationIndex uint64, trans Transport) (SnapshotSink, error) {\n\t// We only support version 1 snapshots at this time.\n\tif version != 1 {\n\t\treturn nil, fmt.Errorf(\"unsupported snapshot version %d\", version)\n\t}\n\n\t// Create a new path\n\tname := snapshotName(term, index)\n\tpath := filepath.Join(f.path, name+tmpSuffix)\n\tf.logger.Info(\"creating new snapshot\", \"path\", path)\n\n\t// Make the directory\n\tif err := os.MkdirAll(path, 0o755); err != nil {\n\t\tf.logger.Error(\"failed to make snapshot directly\", \"error\", err)\n\t\treturn nil, err\n\t}\n\n\t// Create the sink\n\tsink := &FileSnapshotSink{\n\t\tstore:     f,\n\t\tlogger:    f.logger,\n\t\tdir:       path,\n\t\tparentDir: f.path,\n\t\tnoSync:    f.noSync,\n\t\tmeta: fileSnapshotMeta{\n\t\t\tSnapshotMeta: SnapshotMeta{\n\t\t\t\tVersion:            version,\n\t\t\t\tID:                 name,\n\t\t\t\tIndex:              index,\n\t\t\t\tTerm:               term,\n\t\t\t\tPeers:              encodePeers(configuration, trans),\n\t\t\t\tConfiguration:      configuration,\n\t\t\t\tConfigurationIndex: configurationIndex,\n\t\t\t},\n\t\t\tCRC: nil,\n\t\t},\n\t}\n\n\t// Write out the meta data\n\tif err := sink.writeMeta(); err != nil {\n\t\tf.logger.Error(\"failed to write metadata\", \"error\", err)\n\t\treturn nil, err\n\t}\n\n\t// Open the state file\n\tstatePath := filepath.Join(path, stateFilePath)\n\tfh, err := os.Create(statePath)\n\tif err != nil {\n\t\tf.logger.Error(\"failed to create state file\", \"error\", err)\n\t\treturn nil, err\n\t}\n\tsink.stateFile = fh\n\n\t// Create a CRC64 hash\n\tsink.stateHash = crc64.New(crc64.MakeTable(crc64.ECMA))\n\n\t// Wrap both the hash and file in a MultiWriter with buffering\n\tmulti := io.MultiWriter(sink.stateFile, sink.stateHash)\n\tsink.buffered = bufio.NewWriter(multi)\n\n\t// Done\n\treturn sink, nil\n}\n\n// List returns available snapshots in the store.\nfunc (f *FileSnapshotStore) List() ([]*SnapshotMeta, error) {\n\t// Get the eligible snapshots\n\tsnapshots, err := f.getSnapshots()\n\tif err != nil {\n\t\tf.logger.Error(\"failed to get snapshots\", \"error\", err)\n\t\treturn nil, err\n\t}\n\n\tvar snapMeta []*SnapshotMeta\n\tfor _, meta := range snapshots {\n\t\tsnapMeta = append(snapMeta, &meta.SnapshotMeta)\n\t\tif len(snapMeta) == f.retain {\n\t\t\tbreak\n\t\t}\n\t}\n\treturn snapMeta, nil\n}\n\n// getSnapshots returns all the known snapshots.\nfunc (f *FileSnapshotStore) getSnapshots() ([]*fileSnapshotMeta, error) {\n\t// Get the eligible snapshots\n\tsnapshots, err := os.ReadDir(f.path)\n\tif err != nil {\n\t\tf.logger.Error(\"failed to scan snapshot directory\", \"error\", err)\n\t\treturn nil, err\n\t}\n\n\t// Populate the metadata\n\tvar snapMeta []*fileSnapshotMeta\n\tfor _, snap := range snapshots {\n\t\t// Ignore any files\n\t\tif !snap.IsDir() {\n\t\t\tcontinue\n\t\t}\n\n\t\t// Ignore any temporary snapshots\n\t\tdirName := snap.Name()\n\t\tif strings.HasSuffix(dirName, tmpSuffix) {\n\t\t\tf.logger.Warn(\"found temporary snapshot\", \"name\", dirName)\n\t\t\tcontinue\n\t\t}\n\n\t\t// Try to read the meta data\n\t\tmeta, err := f.readMeta(dirName)\n\t\tif err != nil {\n\t\t\tf.logger.Warn(\"failed to read metadata\", \"name\", dirName, \"error\", err)\n\t\t\tcontinue\n\t\t}\n\n\t\t// Make sure we can understand this version.\n\t\tif meta.Version < SnapshotVersionMin || meta.Version > SnapshotVersionMax {\n\t\t\tf.logger.Warn(\"snapshot version not supported\", \"name\", dirName, \"version\", meta.Version)\n\t\t\tcontinue\n\t\t}\n\n\t\t// Append, but only return up to the retain count\n\t\tsnapMeta = append(snapMeta, meta)\n\t}\n\n\t// Sort the snapshot, reverse so we get new -> old\n\tsort.Sort(sort.Reverse(snapMetaSlice(snapMeta)))\n\n\treturn snapMeta, nil\n}\n\n// readMeta is used to read the meta data for a given named backup\nfunc (f *FileSnapshotStore) readMeta(name string) (*fileSnapshotMeta, error) {\n\t// Open the meta file\n\tmetaPath := filepath.Join(f.path, name, metaFilePath)\n\tfh, err := os.Open(metaPath)\n\tif err != nil {\n\t\treturn nil, err\n\t}\n\tdefer func() { _ = fh.Close() }()\n\n\t// Buffer the file IO\n\tbuffered := bufio.NewReader(fh)\n\n\t// Read in the JSON\n\tmeta := &fileSnapshotMeta{}\n\tdec := json.NewDecoder(buffered)\n\tif err := dec.Decode(meta); err != nil {\n\t\treturn nil, err\n\t}\n\treturn meta, nil\n}\n\n// Open takes a snapshot ID and returns a ReadCloser for that snapshot.\nfunc (f *FileSnapshotStore) Open(id string) (*SnapshotMeta, io.ReadCloser, error) {\n\t// Get the metadata\n\tmeta, err := f.readMeta(id)\n\tif err != nil {\n\t\tf.logger.Error(\"failed to get meta data to open snapshot\", \"error\", err)\n\t\treturn nil, nil, err\n\t}\n\n\t// Open the state file\n\tstatePath := filepath.Join(f.path, id, stateFilePath)\n\tfh, err := os.Open(statePath)\n\tif err != nil {\n\t\tf.logger.Error(\"failed to open state file\", \"error\", err)\n\t\treturn nil, nil, err\n\t}\n\n\t// Create a CRC64 hash\n\tstateHash := crc64.New(crc64.MakeTable(crc64.ECMA))\n\n\t// Compute the hash\n\t_, err = io.Copy(stateHash, fh)\n\tif err != nil {\n\t\tf.logger.Error(\"failed to read state file\", \"error\", err)\n\t\t_ = fh.Close()\n\t\treturn nil, nil, err\n\t}\n\n\t// Verify the hash\n\tcomputed := stateHash.Sum(nil)\n\tif !bytes.Equal(meta.CRC, computed) {\n\t\tf.logger.Error(\"CRC checksum failed\", \"stored\", meta.CRC, \"computed\", computed)\n\t\t_ = fh.Close()\n\t\treturn nil, nil, fmt.Errorf(\"CRC mismatch\")\n\t}\n\n\t// Seek to the start\n\tif _, err := fh.Seek(0, 0); err != nil {\n\t\tf.logger.Error(\"state file seek failed\", \"error\", err)\n\t\t_ = fh.Close()\n\t\treturn nil, nil, err\n\t}\n\n\t// Return a buffered file\n\tbuffered := &bufferedFile{\n\t\tbh: bufio.NewReader(fh),\n\t\tfh: fh,\n\t}\n\n\treturn &meta.SnapshotMeta, buffered, nil\n}\n\n// ReapSnapshots reaps any snapshots beyond the retain count.\nfunc (f *FileSnapshotStore) ReapSnapshots() error {\n\tsnapshots, err := f.getSnapshots()\n\tif err != nil {\n\t\tf.logger.Error(\"failed to get snapshots\", \"error\", err)\n\t\treturn err\n\t}\n\n\tfor i := f.retain; i < len(snapshots); i++ {\n\t\tpath := filepath.Join(f.path, snapshots[i].ID)\n\t\tf.logger.Info(\"reaping snapshot\", \"path\", path)\n\t\tif err := os.RemoveAll(path); err != nil {\n\t\t\tf.logger.Error(\"failed to reap snapshot\", \"path\", path, \"error\", err)\n\t\t\treturn err\n\t\t}\n\t}\n\treturn nil\n}\n\n// ID returns the ID of the snapshot, can be used with Open()\n// after the snapshot is finalized.\nfunc (s *FileSnapshotSink) ID() string {\n\treturn s.meta.ID\n}\n\n// Write is used to append to the state file. We write to the\n// buffered IO object to reduce the amount of context switches.\nfunc (s *FileSnapshotSink) Write(b []byte) (int, error) {\n\treturn s.buffered.Write(b)\n}\n\n// Close is used to indicate a successful end.\nfunc (s *FileSnapshotSink) Close() error {\n\t// Make sure close is idempotent\n\tif s.closed {\n\t\treturn nil\n\t}\n\ts.closed = true\n\n\t// Close the open handles\n\tif err := s.finalize(); err != nil {\n\t\ts.logger.Error(\"failed to finalize snapshot\", \"error\", err)\n\t\tif delErr := os.RemoveAll(s.dir); delErr != nil {\n\t\t\ts.logger.Error(\"failed to delete temporary snapshot directory\", \"path\", s.dir, \"error\", delErr)\n\t\t\treturn delErr\n\t\t}\n\t\treturn err\n\t}\n\n\t// Write out the meta data\n\tif err := s.writeMeta(); err != nil {\n\t\ts.logger.Error(\"failed to write metadata\", \"error\", err)\n\t\treturn err\n\t}\n\n\t// Move the directory into place\n\tnewPath := strings.TrimSuffix(s.dir, tmpSuffix)\n\tif err := os.Rename(s.dir, newPath); err != nil {\n\t\ts.logger.Error(\"failed to move snapshot into place\", \"error\", err)\n\t\treturn err\n\t}\n\n\tif !s.noSync && runtime.GOOS != \"windows\" { // skipping fsync for directory entry edits on Windows, only needed for *nix style file systems\n\t\tparentFH, err := os.Open(s.parentDir)\n\t\tif err != nil {\n\t\t\ts.logger.Error(\"failed to open snapshot parent directory\", \"path\", s.parentDir, \"error\", err)\n\t\t\treturn err\n\t\t}\n\t\tdefer func() { _ = parentFH.Close() }()\n\n\t\tif err = parentFH.Sync(); err != nil {\n\t\t\ts.logger.Error(\"failed syncing parent directory\", \"path\", s.parentDir, \"error\", err)\n\t\t\treturn err\n\t\t}\n\t}\n\n\t// Reap any old snapshots\n\tif err := s.store.ReapSnapshots(); err != nil {\n\t\treturn err\n\t}\n\n\treturn nil\n}\n\n// Cancel is used to indicate an unsuccessful end.\nfunc (s *FileSnapshotSink) Cancel() error {\n\t// Make sure close is idempotent\n\tif s.closed {\n\t\treturn nil\n\t}\n\ts.closed = true\n\n\t// Close the open handles\n\tif err := s.finalize(); err != nil {\n\t\ts.logger.Error(\"failed to finalize snapshot\", \"error\", err)\n\t\treturn err\n\t}\n\n\t// Attempt to remove all artifacts\n\treturn os.RemoveAll(s.dir)\n}\n\n// finalize is used to close all of our resources.\nfunc (s *FileSnapshotSink) finalize() error {\n\t// Flush any remaining data\n\tif err := s.buffered.Flush(); err != nil {\n\t\treturn err\n\t}\n\n\t// Sync to force fsync to disk\n\tif !s.noSync {\n\t\tif err := s.stateFile.Sync(); err != nil {\n\t\t\treturn err\n\t\t}\n\t}\n\n\t// Get the file size\n\tstat, statErr := s.stateFile.Stat()\n\n\t// Close the file\n\tif err := s.stateFile.Close(); err != nil {\n\t\treturn err\n\t}\n\n\t// Set the file size, check after we close\n\tif statErr != nil {\n\t\treturn statErr\n\t}\n\ts.meta.Size = stat.Size()\n\n\t// Set the CRC\n\ts.meta.CRC = s.stateHash.Sum(nil)\n\treturn nil\n}\n\n// writeMeta is used to write out the metadata we have.\nfunc (s *FileSnapshotSink) writeMeta() error {\n\tvar err error\n\t// Open the meta file\n\tmetaPath := filepath.Join(s.dir, metaFilePath)\n\tvar fh *os.File\n\tfh, err = os.Create(metaPath)\n\tif err != nil {\n\t\treturn err\n\t}\n\tdefer func() { _ = fh.Close() }()\n\n\t// Buffer the file IO\n\tbuffered := bufio.NewWriter(fh)\n\n\t// Write out as JSON\n\tenc := json.NewEncoder(buffered)\n\tif err = enc.Encode(&s.meta); err != nil {\n\t\treturn err\n\t}\n\n\tif err = buffered.Flush(); err != nil {\n\t\treturn err\n\t}\n\n\tif !s.noSync {\n\t\tif err = fh.Sync(); err != nil {\n\t\t\treturn err\n\t\t}\n\t}\n\n\treturn nil\n}\n\n// Implement the sort interface for []*fileSnapshotMeta.\nfunc (s snapMetaSlice) Len() int {\n\treturn len(s)\n}\n\nfunc (s snapMetaSlice) Less(i, j int) bool {\n\tif s[i].Term != s[j].Term {\n\t\treturn s[i].Term < s[j].Term\n\t}\n\tif s[i].Index != s[j].Index {\n\t\treturn s[i].Index < s[j].Index\n\t}\n\treturn s[i].ID < s[j].ID\n}\n\nfunc (s snapMetaSlice) Swap(i, j int) {\n\ts[i], s[j] = s[j], s[i]\n}\n"
  },
  {
    "path": "file_snapshot_test.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"bytes\"\n\t\"io\"\n\t\"os\"\n\t\"reflect\"\n\t\"runtime\"\n\t\"testing\"\n)\n\nfunc TestFileSnapshotStoreImpl(t *testing.T) {\n\tvar impl interface{} = &FileSnapshotStore{}\n\tif _, ok := impl.(SnapshotStore); !ok {\n\t\tt.Fatalf(\"FileSnapshotStore not a SnapshotStore\")\n\t}\n}\n\nfunc TestFileSnapshotSinkImpl(t *testing.T) {\n\tvar impl interface{} = &FileSnapshotSink{}\n\tif _, ok := impl.(SnapshotSink); !ok {\n\t\tt.Fatalf(\"FileSnapshotSink not a SnapshotSink\")\n\t}\n}\n\nfunc TestFileSS_CreateSnapshotMissingParentDir(t *testing.T) {\n\tparent, err := os.MkdirTemp(\"\", \"raft\")\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v \", err)\n\t}\n\tdefer func() { _ = os.RemoveAll(parent) }()\n\n\tdir, err := os.MkdirTemp(parent, \"raft\")\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v \", err)\n\t}\n\n\tsnap, err := NewFileSnapshotStoreWithLogger(dir, 3, newTestLogger(t))\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t_ = os.RemoveAll(parent)\n\t_, trans := NewInmemTransport(NewInmemAddr())\n\t_, err = snap.Create(SnapshotVersionMax, 10, 3, Configuration{}, 0, trans)\n\tif err != nil {\n\t\tt.Fatalf(\"should not fail when using non existing parent\")\n\t}\n}\n\nfunc TestFileSS_CreateSnapshot(t *testing.T) {\n\t// Create a test dir\n\tdir, err := os.MkdirTemp(\"\", \"raft\")\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v \", err)\n\t}\n\tdefer func() { _ = os.RemoveAll(dir) }()\n\n\tsnap, err := NewFileSnapshotStoreWithLogger(dir, 3, newTestLogger(t))\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Check no snapshots\n\tsnaps, err := snap.List()\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tif len(snaps) != 0 {\n\t\tt.Fatalf(\"did not expect any snapshots: %v\", snaps)\n\t}\n\n\t// Create a new sink\n\tvar configuration Configuration\n\tconfiguration.Servers = append(configuration.Servers, Server{\n\t\tSuffrage: Voter,\n\t\tID:       ServerID(\"my id\"),\n\t\tAddress:  ServerAddress(\"over here\"),\n\t})\n\t_, trans := NewInmemTransport(NewInmemAddr())\n\tsink, err := snap.Create(SnapshotVersionMax, 10, 3, configuration, 2, trans)\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// The sink is not done, should not be in a list!\n\tsnaps, err = snap.List()\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tif len(snaps) != 0 {\n\t\tt.Fatalf(\"did not expect any snapshots: %v\", snaps)\n\t}\n\n\t// Write to the sink\n\t_, err = sink.Write([]byte(\"first\\n\"))\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\t_, err = sink.Write([]byte(\"second\\n\"))\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Done!\n\terr = sink.Close()\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Should have a snapshot!\n\tsnaps, err = snap.List()\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tif len(snaps) != 1 {\n\t\tt.Fatalf(\"expect a snapshots: %v\", snaps)\n\t}\n\n\t// Check the latest\n\tlatest := snaps[0]\n\tif latest.Index != 10 {\n\t\tt.Fatalf(\"bad snapshot: %v\", *latest)\n\t}\n\tif latest.Term != 3 {\n\t\tt.Fatalf(\"bad snapshot: %v\", *latest)\n\t}\n\tif !reflect.DeepEqual(latest.Configuration, configuration) {\n\t\tt.Fatalf(\"bad snapshot: %v\", *latest)\n\t}\n\tif latest.ConfigurationIndex != 2 {\n\t\tt.Fatalf(\"bad snapshot: %v\", *latest)\n\t}\n\tif latest.Size != 13 {\n\t\tt.Fatalf(\"bad snapshot: %v\", *latest)\n\t}\n\n\t// Read the snapshot\n\t_, r, err := snap.Open(latest.ID)\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Read out everything\n\tvar buf bytes.Buffer\n\tif _, err := io.Copy(&buf, r); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tif err := r.Close(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Ensure a match\n\tif !bytes.Equal(buf.Bytes(), []byte(\"first\\nsecond\\n\")) {\n\t\tt.Fatalf(\"content mismatch\")\n\t}\n}\n\nfunc TestFileSS_CancelSnapshot(t *testing.T) {\n\t// Create a test dir\n\tdir, err := os.MkdirTemp(\"\", \"raft\")\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v \", err)\n\t}\n\tdefer func() { _ = os.RemoveAll(dir) }()\n\n\tsnap, err := NewFileSnapshotStoreWithLogger(dir, 3, newTestLogger(t))\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Create a new sink\n\t_, trans := NewInmemTransport(NewInmemAddr())\n\tsink, err := snap.Create(SnapshotVersionMax, 10, 3, Configuration{}, 0, trans)\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Cancel the snapshot! Should delete\n\terr = sink.Cancel()\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// The sink is canceled, should not be in a list!\n\tsnaps, err := snap.List()\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tif len(snaps) != 0 {\n\t\tt.Fatalf(\"did not expect any snapshots: %v\", snaps)\n\t}\n}\n\nfunc TestFileSS_Retention(t *testing.T) {\n\tvar err error\n\t// Create a test dir\n\tvar dir string\n\tdir, err = os.MkdirTemp(\"\", \"raft\")\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v \", err)\n\t}\n\tdefer func() { _ = os.RemoveAll(dir) }()\n\n\tvar snap *FileSnapshotStore\n\tsnap, err = NewFileSnapshotStoreWithLogger(dir, 2, newTestLogger(t))\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Create a few snapshots\n\t_, trans := NewInmemTransport(NewInmemAddr())\n\tfor i := 10; i < 15; i++ {\n\t\tvar sink SnapshotSink\n\t\tsink, err = snap.Create(SnapshotVersionMax, uint64(i), 3, Configuration{}, 0, trans)\n\t\tif err != nil {\n\t\t\tt.Fatalf(\"err: %v\", err)\n\t\t}\n\t\terr = sink.Close()\n\t\tif err != nil {\n\t\t\tt.Fatalf(\"err: %v\", err)\n\t\t}\n\t}\n\n\t// Should only have 2 listed!\n\tvar snaps []*SnapshotMeta\n\tsnaps, err = snap.List()\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tif len(snaps) != 2 {\n\t\tt.Fatalf(\"expect 2 snapshots: %v\", snaps)\n\t}\n\n\t// Check they are the latest\n\tif snaps[0].Index != 14 {\n\t\tt.Fatalf(\"bad snap: %#v\", *snaps[0])\n\t}\n\tif snaps[1].Index != 13 {\n\t\tt.Fatalf(\"bad snap: %#v\", *snaps[1])\n\t}\n}\n\nfunc TestFileSS_BadPerm(t *testing.T) {\n\tvar err error\n\tif runtime.GOOS == \"windows\" {\n\t\tt.Skip(\"skipping file permission test on windows\")\n\t}\n\n\t// Create a temp dir\n\tvar dir1 string\n\tdir1, err = os.MkdirTemp(\"\", \"raft\")\n\tif err != nil {\n\t\tt.Fatalf(\"err: %s\", err)\n\t}\n\tdefer func() { _ = os.RemoveAll(dir1) }()\n\n\t// Create a sub dir and remove all permissions\n\tvar dir2 string\n\tdir2, err = os.MkdirTemp(dir1, \"badperm\")\n\tif err != nil {\n\t\tt.Fatalf(\"err: %s\", err)\n\t}\n\tif err = os.Chmod(dir2, 0o00); err != nil {\n\t\tt.Fatalf(\"err: %s\", err)\n\t}\n\tdefer func() { _ = os.Chmod(dir2, 0777) }() // Set perms back for delete\n\n\t// Should fail\n\tif _, err = NewFileSnapshotStore(dir2, 3, nil); err == nil {\n\t\tt.Fatalf(\"should fail to use dir with bad perms\")\n\t}\n}\n\nfunc TestFileSS_MissingParentDir(t *testing.T) {\n\tparent, err := os.MkdirTemp(\"\", \"raft\")\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v \", err)\n\t}\n\tdefer func() { _ = os.RemoveAll(parent) }()\n\n\tdir, err := os.MkdirTemp(parent, \"raft\")\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v \", err)\n\t}\n\n\t_ = os.RemoveAll(parent)\n\t_, err = NewFileSnapshotStore(dir, 3, nil)\n\tif err != nil {\n\t\tt.Fatalf(\"should not fail when using non existing parent\")\n\t}\n}\n\nfunc TestFileSS_Ordering(t *testing.T) {\n\t// Create a test dir\n\tdir, err := os.MkdirTemp(\"\", \"raft\")\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v \", err)\n\t}\n\tdefer func() { _ = os.RemoveAll(dir) }()\n\n\tsnap, err := NewFileSnapshotStoreWithLogger(dir, 3, newTestLogger(t))\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Create a new sink\n\t_, trans := NewInmemTransport(NewInmemAddr())\n\tsink, err := snap.Create(SnapshotVersionMax, 130350, 5, Configuration{}, 0, trans)\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\terr = sink.Close()\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\tsink, err = snap.Create(SnapshotVersionMax, 204917, 36, Configuration{}, 0, trans)\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\terr = sink.Close()\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Should only have 2 listed!\n\tsnaps, err := snap.List()\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tif len(snaps) != 2 {\n\t\tt.Fatalf(\"expect 2 snapshots: %v\", snaps)\n\t}\n\n\t// Check they are ordered\n\tif snaps[0].Term != 36 {\n\t\tt.Fatalf(\"bad snap: %#v\", *snaps[0])\n\t}\n\tif snaps[1].Term != 5 {\n\t\tt.Fatalf(\"bad snap: %#v\", *snaps[1])\n\t}\n}\n"
  },
  {
    "path": "fsm.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"fmt\"\n\t\"io\"\n\t\"time\"\n\n\thclog \"github.com/hashicorp/go-hclog\"\n\t\"github.com/hashicorp/go-metrics/compat\"\n)\n\n// FSM is implemented by clients to make use of the replicated log.\ntype FSM interface {\n\t// Apply is called once a log entry is committed by a majority of the cluster.\n\t//\n\t// Apply should apply the log to the FSM. Apply must be deterministic and\n\t// produce the same result on all peers in the cluster.\n\t//\n\t// The returned value is returned to the client as the ApplyFuture.Response.\n\tApply(*Log) interface{}\n\n\t// Snapshot returns an FSMSnapshot used to: support log compaction, to\n\t// restore the FSM to a previous state, or to bring out-of-date followers up\n\t// to a recent log index.\n\t//\n\t// The Snapshot implementation should return quickly, because Apply can not\n\t// be called while Snapshot is running. Generally this means Snapshot should\n\t// only capture a pointer to the state, and any expensive IO should happen\n\t// as part of FSMSnapshot.Persist.\n\t//\n\t// Apply and Snapshot are always called from the same thread, but Apply will\n\t// be called concurrently with FSMSnapshot.Persist. This means the FSM should\n\t// be implemented to allow for concurrent updates while a snapshot is happening.\n\t//\n\t// Clients of this library should make no assumptions about whether a returned\n\t// Snapshot() will actually be stored by Raft. In fact it's quite possible that\n\t// any Snapshot returned by this call will be discarded, and that\n\t// FSMSnapshot.Persist will never be called. Raft will always call\n\t// FSMSnapshot.Release however.\n\tSnapshot() (FSMSnapshot, error)\n\n\t// Restore is used to restore an FSM from a snapshot. It is not called\n\t// concurrently with any other command. The FSM must discard all previous\n\t// state before restoring the snapshot.\n\tRestore(snapshot io.ReadCloser) error\n}\n\n// BatchingFSM extends the FSM interface to add an ApplyBatch function. This can\n// optionally be implemented by clients to enable multiple logs to be applied to\n// the FSM in batches. Up to MaxAppendEntries could be sent in a batch.\ntype BatchingFSM interface {\n\t// ApplyBatch is invoked once a batch of log entries has been committed and\n\t// are ready to be applied to the FSM. ApplyBatch will take in an array of\n\t// log entries. These log entries will be in the order they were committed,\n\t// will not have gaps, and could be of a few log types. Clients should check\n\t// the log type prior to attempting to decode the data attached. Presently\n\t// the LogCommand and LogConfiguration types will be sent.\n\t//\n\t// The returned slice must be the same length as the input and each response\n\t// should correlate to the log at the same index of the input. The returned\n\t// values will be made available in the ApplyFuture returned by Raft.Apply\n\t// method if that method was called on the same Raft node as the FSM.\n\tApplyBatch([]*Log) []interface{}\n\n\tFSM\n}\n\n// FSMSnapshot is returned by an FSM in response to a Snapshot\n// It must be safe to invoke FSMSnapshot methods with concurrent\n// calls to Apply.\ntype FSMSnapshot interface {\n\t// Persist should dump all necessary state to the WriteCloser 'sink',\n\t// and call sink.Close() when finished or call sink.Cancel() on error.\n\tPersist(sink SnapshotSink) error\n\n\t// Release is invoked when we are finished with the snapshot.\n\tRelease()\n}\n\n// runFSM is a long running goroutine responsible for applying logs\n// to the FSM. This is done async of other logs since we don't want\n// the FSM to block our internal operations.\nfunc (r *Raft) runFSM() {\n\tvar lastIndex, lastTerm uint64\n\n\tbatchingFSM, batchingEnabled := r.fsm.(BatchingFSM)\n\tconfigStore, configStoreEnabled := r.fsm.(ConfigurationStore)\n\n\tapplySingle := func(req *commitTuple) {\n\t\t// Apply the log if a command or config change\n\t\tvar resp interface{}\n\t\t// Make sure we send a response\n\t\tdefer func() {\n\t\t\t// Invoke the future if given\n\t\t\tif req.future != nil {\n\t\t\t\treq.future.response = resp\n\t\t\t\treq.future.respond(nil)\n\t\t\t}\n\t\t}()\n\n\t\tswitch req.log.Type {\n\t\tcase LogCommand:\n\t\t\tstart := time.Now()\n\t\t\tresp = r.fsm.Apply(req.log)\n\t\t\tmetrics.MeasureSince([]string{\"raft\", \"fsm\", \"apply\"}, start)\n\n\t\tcase LogConfiguration:\n\t\t\tif !configStoreEnabled {\n\t\t\t\t// Return early to avoid incrementing the index and term for\n\t\t\t\t// an unimplemented operation.\n\t\t\t\treturn\n\t\t\t}\n\n\t\t\tstart := time.Now()\n\t\t\tconfigStore.StoreConfiguration(req.log.Index, DecodeConfiguration(req.log.Data))\n\t\t\tmetrics.MeasureSince([]string{\"raft\", \"fsm\", \"store_config\"}, start)\n\t\t}\n\n\t\t// Update the indexes\n\t\tlastIndex = req.log.Index\n\t\tlastTerm = req.log.Term\n\t}\n\n\tapplyBatch := func(reqs []*commitTuple) {\n\t\tif !batchingEnabled {\n\t\t\tfor _, ct := range reqs {\n\t\t\t\tapplySingle(ct)\n\t\t\t}\n\t\t\treturn\n\t\t}\n\n\t\t// Only send LogCommand and LogConfiguration log types. LogBarrier types\n\t\t// will not be sent to the FSM.\n\t\tshouldSend := func(l *Log) bool {\n\t\t\tswitch l.Type {\n\t\t\tcase LogCommand, LogConfiguration:\n\t\t\t\treturn true\n\t\t\t}\n\t\t\treturn false\n\t\t}\n\n\t\tvar lastBatchIndex, lastBatchTerm uint64\n\t\tsendLogs := make([]*Log, 0, len(reqs))\n\t\tfor _, req := range reqs {\n\t\t\tif shouldSend(req.log) {\n\t\t\t\tsendLogs = append(sendLogs, req.log)\n\t\t\t}\n\t\t\tlastBatchIndex = req.log.Index\n\t\t\tlastBatchTerm = req.log.Term\n\t\t}\n\n\t\tvar responses []interface{}\n\t\tif len(sendLogs) > 0 {\n\t\t\tstart := time.Now()\n\t\t\tresponses = batchingFSM.ApplyBatch(sendLogs)\n\t\t\tmetrics.MeasureSince([]string{\"raft\", \"fsm\", \"applyBatch\"}, start)\n\t\t\tmetrics.AddSample([]string{\"raft\", \"fsm\", \"applyBatchNum\"}, float32(len(reqs)))\n\n\t\t\t// Ensure we get the expected responses\n\t\t\tif len(sendLogs) != len(responses) {\n\t\t\t\tpanic(\"invalid number of responses\")\n\t\t\t}\n\t\t}\n\n\t\t// Update the indexes\n\t\tlastIndex = lastBatchIndex\n\t\tlastTerm = lastBatchTerm\n\n\t\tvar i int\n\t\tfor _, req := range reqs {\n\t\t\tvar resp interface{}\n\t\t\t// If the log was sent to the FSM, retrieve the response.\n\t\t\tif shouldSend(req.log) {\n\t\t\t\tresp = responses[i]\n\t\t\t\ti++\n\t\t\t}\n\n\t\t\tif req.future != nil {\n\t\t\t\treq.future.response = resp\n\t\t\t\treq.future.respond(nil)\n\t\t\t}\n\t\t}\n\t}\n\n\trestore := func(req *restoreFuture) {\n\t\t// Open the snapshot\n\t\tmeta, source, err := r.snapshots.Open(req.ID)\n\t\tif err != nil {\n\t\t\treq.respond(fmt.Errorf(\"failed to open snapshot %v: %v\", req.ID, err))\n\t\t\treturn\n\t\t}\n\t\tdefer func() { _ = source.Close() }()\n\n\t\tsnapLogger := r.logger.With(\n\t\t\t\"id\", req.ID,\n\t\t\t\"last-index\", meta.Index,\n\t\t\t\"last-term\", meta.Term,\n\t\t\t\"size-in-bytes\", meta.Size,\n\t\t)\n\n\t\t// Attempt to restore\n\t\tif err := fsmRestoreAndMeasure(snapLogger, r.fsm, source, meta.Size); err != nil {\n\t\t\treq.respond(fmt.Errorf(\"failed to restore snapshot %v: %v\", req.ID, err))\n\t\t\treturn\n\t\t}\n\n\t\t// Update the last index and term\n\t\tlastIndex = meta.Index\n\t\tlastTerm = meta.Term\n\t\treq.respond(nil)\n\t}\n\n\tsnapshot := func(req *reqSnapshotFuture) {\n\t\t// Is there something to snapshot?\n\t\tif lastIndex == 0 {\n\t\t\treq.respond(ErrNothingNewToSnapshot)\n\t\t\treturn\n\t\t}\n\n\t\t// Start a snapshot\n\t\tstart := time.Now()\n\t\tsnap, err := r.fsm.Snapshot()\n\t\tmetrics.MeasureSince([]string{\"raft\", \"fsm\", \"snapshot\"}, start)\n\n\t\t// Respond to the request\n\t\treq.index = lastIndex\n\t\treq.term = lastTerm\n\t\treq.snapshot = snap\n\t\treq.respond(err)\n\t}\n\n\tsaturation := newSaturationMetric([]string{\"raft\", \"thread\", \"fsm\", \"saturation\"}, 1*time.Second)\n\n\tfor {\n\t\tsaturation.sleeping()\n\n\t\tselect {\n\t\tcase ptr := <-r.fsmMutateCh:\n\t\t\tsaturation.working()\n\n\t\t\tswitch req := ptr.(type) {\n\t\t\tcase []*commitTuple:\n\t\t\t\tapplyBatch(req)\n\n\t\t\tcase *restoreFuture:\n\t\t\t\trestore(req)\n\n\t\t\tdefault:\n\t\t\t\tpanic(fmt.Errorf(\"bad type passed to fsmMutateCh: %#v\", ptr))\n\t\t\t}\n\n\t\tcase req := <-r.fsmSnapshotCh:\n\t\t\tsaturation.working()\n\n\t\t\tsnapshot(req)\n\n\t\tcase <-r.shutdownCh:\n\t\t\treturn\n\t\t}\n\t}\n}\n\n// fsmRestoreAndMeasure wraps the Restore call on an FSM to consistently measure\n// and report timing metrics. The caller is still responsible for calling Close\n// on the source in all cases.\nfunc fsmRestoreAndMeasure(logger hclog.Logger, fsm FSM, source io.ReadCloser, snapshotSize int64) error {\n\tstart := time.Now()\n\n\tcrc := newCountingReadCloser(source)\n\n\tmonitor := startSnapshotRestoreMonitor(logger, crc, snapshotSize, false)\n\tdefer monitor.StopAndWait()\n\n\tif err := fsm.Restore(crc); err != nil {\n\t\treturn err\n\t}\n\tmetrics.MeasureSince([]string{\"raft\", \"fsm\", \"restore\"}, start)\n\tmetrics.SetGauge([]string{\"raft\", \"fsm\", \"lastRestoreDuration\"},\n\t\tfloat32(time.Since(start).Milliseconds()))\n\n\treturn nil\n}\n"
  },
  {
    "path": "future.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"fmt\"\n\t\"io\"\n\t\"sync\"\n\t\"time\"\n)\n\n// Future is used to represent an action that may occur in the future.\ntype Future interface {\n\t// Error blocks until the future arrives and then returns the error status\n\t// of the future. This may be called any number of times - all calls will\n\t// return the same value, however is not OK to call this method twice\n\t// concurrently on the same Future instance.\n\t// Error will only return generic errors related to raft, such\n\t// as ErrLeadershipLost, or ErrRaftShutdown. Some operations, such as\n\t// ApplyLog, may also return errors from other methods.\n\tError() error\n}\n\n// IndexFuture is used for future actions that can result in a raft log entry\n// being created.\ntype IndexFuture interface {\n\tFuture\n\n\t// Index holds the index of the newly applied log entry.\n\t// This must not be called until after the Error method has returned.\n\tIndex() uint64\n}\n\n// ApplyFuture is used for Apply and can return the FSM response.\ntype ApplyFuture interface {\n\tIndexFuture\n\n\t// Response returns the FSM response as returned by the FSM.Apply method. This\n\t// must not be called until after the Error method has returned.\n\t// Note that if FSM.Apply returns an error, it will be returned by Response,\n\t// and not by the Error method, so it is always important to check Response\n\t// for errors from the FSM.\n\tResponse() interface{}\n}\n\n// ConfigurationFuture is used for GetConfiguration and can return the\n// latest configuration in use by Raft.\ntype ConfigurationFuture interface {\n\tIndexFuture\n\n\t// Configuration contains the latest configuration. This must\n\t// not be called until after the Error method has returned.\n\tConfiguration() Configuration\n}\n\n// SnapshotFuture is used for waiting on a user-triggered snapshot to complete.\ntype SnapshotFuture interface {\n\tFuture\n\n\t// Open is a function you can call to access the underlying snapshot and\n\t// its metadata. This must not be called until after the Error method\n\t// has returned.\n\tOpen() (*SnapshotMeta, io.ReadCloser, error)\n}\n\n// LeadershipTransferFuture is used for waiting on a user-triggered leadership\n// transfer to complete.\ntype LeadershipTransferFuture interface {\n\tFuture\n}\n\n// errorFuture is used to return a static error.\ntype errorFuture struct {\n\terr error\n}\n\nfunc (e errorFuture) Error() error {\n\treturn e.err\n}\n\nfunc (e errorFuture) Response() interface{} {\n\treturn nil\n}\n\nfunc (e errorFuture) Index() uint64 {\n\treturn 0\n}\n\n// deferError can be embedded to allow a future\n// to provide an error in the future.\ntype deferError struct {\n\terr        error\n\terrCh      chan error\n\tresponded  bool\n\tShutdownCh chan struct{}\n}\n\nfunc (d *deferError) init() {\n\td.errCh = make(chan error, 1)\n}\n\nfunc (d *deferError) Error() error {\n\tif d.err != nil {\n\t\t// Note that when we've received a nil error, this\n\t\t// won't trigger, but the channel is closed after\n\t\t// send so we'll still return nil below.\n\t\treturn d.err\n\t}\n\tif d.errCh == nil {\n\t\tpanic(\"waiting for response on nil channel\")\n\t}\n\tselect {\n\tcase d.err = <-d.errCh:\n\tcase <-d.ShutdownCh:\n\t\td.err = ErrRaftShutdown\n\t}\n\treturn d.err\n}\n\nfunc (d *deferError) respond(err error) {\n\tif d.errCh == nil {\n\t\treturn\n\t}\n\tif d.responded {\n\t\treturn\n\t}\n\td.errCh <- err\n\tclose(d.errCh)\n\td.responded = true\n}\n\n// There are several types of requests that cause a configuration entry to\n// be appended to the log. These are encoded here for leaderLoop() to process.\n// This is internal to a single server.\ntype configurationChangeFuture struct {\n\tlogFuture\n\treq configurationChangeRequest\n}\n\n// bootstrapFuture is used to attempt a live bootstrap of the cluster. See the\n// Raft object's BootstrapCluster member function for more details.\ntype bootstrapFuture struct {\n\tdeferError\n\n\t// configuration is the proposed bootstrap configuration to apply.\n\tconfiguration Configuration\n}\n\n// logFuture is used to apply a log entry and waits until\n// the log is considered committed.\ntype logFuture struct {\n\tdeferError\n\tlog      Log\n\tresponse interface{}\n\tdispatch time.Time\n}\n\nfunc (l *logFuture) Response() interface{} {\n\treturn l.response\n}\n\nfunc (l *logFuture) Index() uint64 {\n\treturn l.log.Index\n}\n\ntype shutdownFuture struct {\n\traft *Raft\n}\n\nfunc (s *shutdownFuture) Error() error {\n\tif s.raft == nil {\n\t\treturn nil\n\t}\n\ts.raft.waitShutdown()\n\tif closeable, ok := s.raft.trans.(WithClose); ok {\n\t\t_ = closeable.Close()\n\t}\n\treturn nil\n}\n\n// userSnapshotFuture is used for waiting on a user-triggered snapshot to\n// complete.\ntype userSnapshotFuture struct {\n\tdeferError\n\n\t// opener is a function used to open the snapshot. This is filled in\n\t// once the future returns with no error.\n\topener func() (*SnapshotMeta, io.ReadCloser, error)\n}\n\n// Open is a function you can call to access the underlying snapshot and its\n// metadata.\nfunc (u *userSnapshotFuture) Open() (*SnapshotMeta, io.ReadCloser, error) {\n\tif u.opener == nil {\n\t\treturn nil, nil, fmt.Errorf(\"no snapshot available\")\n\t}\n\t// Invalidate the opener so it can't get called multiple times,\n\t// which isn't generally safe.\n\tdefer func() {\n\t\tu.opener = nil\n\t}()\n\treturn u.opener()\n}\n\n// userRestoreFuture is used for waiting on a user-triggered restore of an\n// external snapshot to complete.\ntype userRestoreFuture struct {\n\tdeferError\n\n\t// meta is the metadata that belongs with the snapshot.\n\tmeta *SnapshotMeta\n\n\t// reader is the interface to read the snapshot contents from.\n\treader io.Reader\n}\n\n// reqSnapshotFuture is used for requesting a snapshot start.\n// It is only used internally.\ntype reqSnapshotFuture struct {\n\tdeferError\n\n\t// snapshot details provided by the FSM runner before responding\n\tindex    uint64\n\tterm     uint64\n\tsnapshot FSMSnapshot\n}\n\n// restoreFuture is used for requesting an FSM to perform a\n// snapshot restore. Used internally only.\ntype restoreFuture struct {\n\tdeferError\n\tID string\n}\n\n// verifyFuture is used to verify the current node is still\n// the leader. This is to prevent a stale read.\ntype verifyFuture struct {\n\tdeferError\n\tnotifyCh   chan *verifyFuture\n\tquorumSize int\n\tvotes      int\n\tvoteLock   sync.Mutex\n}\n\n// leadershipTransferFuture is used to track the progress of a leadership\n// transfer internally.\ntype leadershipTransferFuture struct {\n\tdeferError\n\n\tID      *ServerID\n\tAddress *ServerAddress\n}\n\n// configurationsFuture is used to retrieve the current configurations. This is\n// used to allow safe access to this information outside of the main thread.\ntype configurationsFuture struct {\n\tdeferError\n\tconfigurations configurations\n}\n\n// Configuration returns the latest configuration in use by Raft.\nfunc (c *configurationsFuture) Configuration() Configuration {\n\treturn c.configurations.latest\n}\n\n// Index returns the index of the latest configuration in use by Raft.\nfunc (c *configurationsFuture) Index() uint64 {\n\treturn c.configurations.latestIndex\n}\n\n// vote is used to respond to a verifyFuture.\n// This may block when responding on the notifyCh.\nfunc (v *verifyFuture) vote(leader bool) {\n\tv.voteLock.Lock()\n\tdefer v.voteLock.Unlock()\n\n\t// Guard against having notified already\n\tif v.notifyCh == nil {\n\t\treturn\n\t}\n\n\tif leader {\n\t\tv.votes++\n\t\tif v.votes >= v.quorumSize {\n\t\t\tv.notifyCh <- v\n\t\t\tv.notifyCh = nil\n\t\t}\n\t} else {\n\t\tv.notifyCh <- v\n\t\tv.notifyCh = nil\n\t}\n}\n\n// appendFuture is used for waiting on a pipelined append\n// entries RPC.\ntype appendFuture struct {\n\tdeferError\n\tstart time.Time\n\targs  *AppendEntriesRequest\n\tresp  *AppendEntriesResponse\n}\n\nfunc (a *appendFuture) Start() time.Time {\n\treturn a.start\n}\n\nfunc (a *appendFuture) Request() *AppendEntriesRequest {\n\treturn a.args\n}\n\nfunc (a *appendFuture) Response() *AppendEntriesResponse {\n\treturn a.resp\n}\n"
  },
  {
    "path": "future_test.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"errors\"\n\t\"testing\"\n)\n\nfunc TestDeferFutureSuccess(t *testing.T) {\n\tvar f deferError\n\tf.init()\n\tf.respond(nil)\n\tif err := f.Error(); err != nil {\n\t\tt.Fatalf(\"unexpected error result; got %#v want nil\", err)\n\t}\n\tif err := f.Error(); err != nil {\n\t\tt.Fatalf(\"unexpected error result; got %#v want nil\", err)\n\t}\n}\n\nfunc TestDeferFutureError(t *testing.T) {\n\twant := errors.New(\"x\")\n\tvar f deferError\n\tf.init()\n\tf.respond(want)\n\tif got := f.Error(); got != want {\n\t\tt.Fatalf(\"unexpected error result; got %#v want %#v\", got, want)\n\t}\n\tif got := f.Error(); got != want {\n\t\tt.Fatalf(\"unexpected error result; got %#v want %#v\", got, want)\n\t}\n}\n\nfunc TestDeferFutureConcurrent(t *testing.T) {\n\t// Food for the race detector.\n\twant := errors.New(\"x\")\n\tvar f deferError\n\tf.init()\n\tgo f.respond(want)\n\tif got := f.Error(); got != want {\n\t\tt.Errorf(\"unexpected error result; got %#v want %#v\", got, want)\n\t}\n}\n"
  },
  {
    "path": "fuzzy/apply_src.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage fuzzy\n\nimport (\n\t\"hash/fnv\"\n\t\"math/rand\"\n\t\"testing\"\n\t\"time\"\n)\n\ntype applySource struct {\n\trnd  *rand.Rand\n\tseed int64\n}\n\n// newApplySource will create a new source, any source created with the same seed will generate the same sequence of data.\nfunc newApplySource(seed string) *applySource {\n\th := fnv.New32()\n\th.Write([]byte(seed))\n\ts := &applySource{seed: int64(h.Sum32())}\n\ts.reset()\n\treturn s\n}\n\n// reset this source back to its initial state, it'll generate the same sequence of data it initially did\nfunc (a *applySource) reset() {\n\ta.rnd = rand.New(rand.NewSource(a.seed))\n}\n\nfunc (a *applySource) nextEntry() []byte {\n\tconst sz = 33\n\tr := make([]byte, sz)\n\tfor i := 0; i < len(r); i++ {\n\t\tr[i] = byte(a.rnd.Int31n(256))\n\t}\n\treturn r\n}\n\ntype clusterApplier struct {\n\tstopCh  chan bool\n\tapplied uint64\n\tsrc     *applySource\n}\n\n// runs apply in chunks of n to the cluster, use the returned Applier to Stop() it\nfunc (a *applySource) apply(t *testing.T, c *cluster, n uint) *clusterApplier {\n\tap := &clusterApplier{stopCh: make(chan bool), src: a}\n\tgo ap.apply(t, c, n)\n\treturn ap\n}\n\nfunc (ca *clusterApplier) apply(t *testing.T, c *cluster, n uint) {\n\tfor true {\n\t\tselect {\n\t\tcase <-ca.stopCh:\n\t\t\treturn\n\t\tdefault:\n\t\t\tca.applied += c.ApplyN(t, 5*time.Second, ca.src, n)\n\t\t}\n\t}\n}\n\nfunc (ca *clusterApplier) stop() {\n\tca.stopCh <- true\n\tclose(ca.stopCh)\n}\n"
  },
  {
    "path": "fuzzy/cluster.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage fuzzy\n\nimport (\n\t\"bytes\"\n\t\"fmt\"\n\t\"io\"\n\t\"os\"\n\t\"path/filepath\"\n\t\"testing\"\n\t\"time\"\n\n\t\"github.com/hashicorp/go-hclog\"\n\n\t\"github.com/hashicorp/raft\"\n)\n\ntype appliedItem struct {\n\tindex uint64\n\tdata  []byte\n}\n\ntype cluster struct {\n\tnodes            []*raftNode\n\tremovedNodes     []*raftNode\n\tlastApplySuccess raft.ApplyFuture\n\tlastApplyFailure raft.ApplyFuture\n\tapplied          []appliedItem\n\tlog              Logger\n\ttransports       *transports\n\thooks            TransportHooks\n}\n\n// Logger is abstract type for debug log messages\ntype Logger interface {\n\tLog(v ...interface{})\n\tLogf(s string, v ...interface{})\n}\n\n// LoggerAdapter allows a log.Logger to be used with the local Logger interface\ntype LoggerAdapter struct {\n\tlog hclog.Logger\n}\n\n// Log a message to the contained debug log\nfunc (a *LoggerAdapter) Log(v ...interface{}) {\n\ta.log.Info(fmt.Sprint(v...))\n}\n\n// Logf will record a formatted message to the contained debug log\nfunc (a *LoggerAdapter) Logf(s string, v ...interface{}) {\n\ta.log.Info(fmt.Sprintf(s, v...))\n}\n\nfunc newRaftCluster(t *testing.T, logWriter io.Writer, namePrefix string, n uint, transportHooks TransportHooks) *cluster {\n\tres := make([]*raftNode, 0, n)\n\tnames := make([]string, 0, n)\n\tfor i := uint(0); i < n; i++ {\n\t\tnames = append(names, nodeName(namePrefix, i))\n\t}\n\tl := hclog.New(&hclog.LoggerOptions{\n\t\tOutput: logWriter,\n\t\tLevel:  hclog.DefaultLevel,\n\t})\n\ttransports := newTransports(l)\n\tfor _, i := range names {\n\n\t\tr, err := newRaftNode(hclog.New(&hclog.LoggerOptions{\n\t\t\tName:   i + \":\",\n\t\t\tOutput: logWriter,\n\t\t\tLevel:  hclog.DefaultLevel,\n\t\t}), transports, transportHooks, names, i)\n\t\tif err != nil {\n\t\t\tt.Fatalf(\"Unable to create raftNode:%v : %v\", i, err)\n\t\t}\n\t\tres = append(res, r)\n\t}\n\treturn &cluster{\n\t\tnodes:        res,\n\t\tremovedNodes: make([]*raftNode, 0, n),\n\t\tapplied:      make([]appliedItem, 0, 1024),\n\t\tlog:          &LoggerAdapter{l},\n\t\ttransports:   transports,\n\t\thooks:        transportHooks,\n\t}\n}\n\nfunc (c *cluster) CreateAndAddNode(t *testing.T, logWriter io.Writer, namePrefix string, nodeNum uint) error {\n\tname := nodeName(namePrefix, nodeNum)\n\trn, err := newRaftNode(hclog.New(&hclog.LoggerOptions{\n\t\tName:   name + \":\",\n\t\tOutput: logWriter,\n\t\tLevel:  hclog.DefaultLevel,\n\t}), c.transports, c.hooks, nil, name)\n\tif err != nil {\n\t\tt.Fatalf(\"Unable to create raftNode:%v : %v\", name, err)\n\t}\n\tc.nodes = append(c.nodes, rn)\n\tf := c.Leader(time.Minute).raft.AddVoter(raft.ServerID(name), raft.ServerAddress(name), 0, 0)\n\treturn f.Error()\n}\n\nfunc nodeName(prefix string, num uint) string {\n\treturn fmt.Sprintf(\"%v_%d\", prefix, num)\n}\n\nfunc (c *cluster) RemoveNode(t *testing.T, name string) *raftNode {\n\tnc := make([]*raftNode, 0, len(c.nodes))\n\tvar nodeToRemove *raftNode\n\tfor _, rn := range c.nodes {\n\t\tif rn.name == name {\n\t\t\tnodeToRemove = rn\n\t\t} else {\n\t\t\tnc = append(nc, rn)\n\t\t}\n\t}\n\tif nodeToRemove == nil {\n\t\tt.Fatalf(\"Unable to find node with name '%v' in cluster\", name)\n\t}\n\tc.log.Logf(\"Removing node %v from cluster\", name)\n\tc.Leader(time.Minute).raft.RemovePeer(raft.ServerAddress(name)).Error()\n\tc.nodes = nc\n\tc.removedNodes = append(c.removedNodes, nodeToRemove)\n\treturn nodeToRemove\n}\n\n// Leader returns the node that is currently the Leader, if there is no\n// leader this function blocks until a leader is elected (or a timeout occurs)\nfunc (c *cluster) Leader(timeout time.Duration) *raftNode {\n\tstart := time.Now()\n\tfor true {\n\t\tfor _, n := range c.nodes {\n\t\t\tif n.raft.State() == raft.Leader {\n\t\t\t\treturn n\n\t\t\t}\n\t\t}\n\t\tif time.Now().Sub(start) > timeout {\n\t\t\treturn nil\n\t\t}\n\t\ttime.Sleep(time.Millisecond)\n\t}\n\treturn nil\n}\n\n// containsNode returns true if the slice 'nodes' contains 'n'\nfunc containsNode(nodes []*raftNode, n *raftNode) bool {\n\tfor _, rn := range nodes {\n\t\tif rn == n {\n\t\t\treturn true\n\t\t}\n\t}\n\treturn false\n}\n\n// LeaderPlus returns the leader + n additional nodes from the cluster\n// the leader is always the first node in the returned slice.\nfunc (c *cluster) LeaderPlus(n int) []*raftNode {\n\tr := make([]*raftNode, 0, n+1)\n\tldr := c.Leader(time.Second)\n\tif ldr != nil {\n\t\tr = append(r, ldr)\n\t}\n\tif len(r) >= n {\n\t\treturn r\n\t}\n\tfor _, node := range c.nodes {\n\t\tif !containsNode(r, node) {\n\t\t\tr = append(r, node)\n\t\t\tif len(r) >= n {\n\t\t\t\treturn r\n\t\t\t}\n\t\t}\n\t}\n\treturn r\n}\n\nfunc (c *cluster) Stop(t *testing.T, maxWait time.Duration) {\n\tc.WaitTilUptoDate(t, maxWait)\n\tfor _, n := range c.nodes {\n\t\tn.raft.Shutdown()\n\t}\n}\n\n// WaitTilUptoDate blocks until all nodes in the cluster have gotten their\n// committedIndex upto the Index from the last successful call to Apply\nfunc (c *cluster) WaitTilUptoDate(t *testing.T, maxWait time.Duration) {\n\tidx := c.lastApplySuccess.Index()\n\tstart := time.Now()\n\tfor true {\n\t\tallAtIdx := true\n\t\tfor i := 0; i < len(c.nodes); i++ {\n\t\t\tnodeAppliedIdx := c.nodes[i].raft.AppliedIndex()\n\t\t\tif nodeAppliedIdx < idx {\n\t\t\t\tallAtIdx = false\n\t\t\t\tbreak\n\t\t\t} else if nodeAppliedIdx > idx {\n\t\t\t\tallAtIdx = false\n\t\t\t\tidx = nodeAppliedIdx\n\t\t\t\tbreak\n\t\t\t}\n\t\t}\n\t\tif allAtIdx {\n\t\t\tt.Logf(\"All nodes have appliedIndex=%d\", idx)\n\t\t\treturn\n\t\t}\n\t\tif time.Now().Sub(start) > maxWait {\n\t\t\tt.Fatalf(\"Gave up waiting for all nodes to reach raft Index %d, [currently at %v]\", idx, c.appliedIndexes())\n\t\t}\n\t\ttime.Sleep(time.Millisecond * 10)\n\t}\n}\n\nfunc (c *cluster) appliedIndexes() map[string]uint64 {\n\tr := make(map[string]uint64, len(c.nodes))\n\tfor _, n := range c.nodes {\n\t\tr[n.name] = n.raft.AppliedIndex()\n\t}\n\treturn r\n}\n\nfunc (c *cluster) generateNApplies(s *applySource, n uint) [][]byte {\n\tdata := make([][]byte, n)\n\tfor i := uint(0); i < n; i++ {\n\t\tdata[i] = s.nextEntry()\n\t}\n\treturn data\n}\n\nfunc (c *cluster) leadershipTransfer(leaderTimeout time.Duration) raft.Future {\n\tldr := c.Leader(leaderTimeout)\n\treturn ldr.raft.LeadershipTransfer()\n}\n\ntype applyFutureWithData struct {\n\tfuture raft.ApplyFuture\n\tdata   []byte\n}\n\nfunc (c *cluster) sendNApplies(leaderTimeout time.Duration, data [][]byte) []applyFutureWithData {\n\tf := []applyFutureWithData{}\n\n\tldr := c.Leader(leaderTimeout)\n\tif ldr != nil {\n\t\tfor _, d := range data {\n\t\t\tf = append(f, applyFutureWithData{future: ldr.raft.Apply(d, time.Second), data: d})\n\t\t}\n\t}\n\treturn f\n}\n\nfunc (c *cluster) checkApplyFutures(futures []applyFutureWithData) uint64 {\n\tsuccess := uint64(0)\n\tfor _, a := range futures {\n\t\tif err := a.future.Error(); err == nil {\n\t\t\tsuccess++\n\t\t\tc.lastApplySuccess = a.future\n\t\t\tc.applied = append(c.applied, appliedItem{a.future.Index(), a.data})\n\t\t} else {\n\t\t\tc.lastApplyFailure = a.future\n\t\t}\n\t}\n\treturn success\n}\n\nfunc (c *cluster) ApplyN(t *testing.T, leaderTimeout time.Duration, s *applySource, n uint) uint64 {\n\tdata := c.generateNApplies(s, n)\n\tfutures := c.sendNApplies(leaderTimeout, data)\n\treturn c.checkApplyFutures(futures)\n}\n\nfunc (c *cluster) VerifyFSM(t *testing.T) {\n\texp := c.nodes[0].fsm\n\texpName := c.nodes[0].name\n\tfor i, n := range c.nodes {\n\t\tif i > 0 {\n\t\t\tif exp.lastIndex != n.fsm.lastIndex {\n\t\t\t\tt.Errorf(\"Node %v FSM lastIndex is %d, but Node %v FSM lastIndex is %d\", n.name, n.fsm.lastIndex, expName, exp.lastIndex)\n\t\t\t}\n\t\t\tif exp.lastTerm != n.fsm.lastTerm {\n\t\t\t\tt.Errorf(\"Node %v FSM lastTerm is %d, but Node %v FSM lastTerm is %d\", n.name, n.fsm.lastTerm, expName, exp.lastTerm)\n\t\t\t}\n\t\t\tif !bytes.Equal(exp.lastHash, n.fsm.lastHash) {\n\t\t\t\tt.Errorf(\"Node %v FSM lastHash is %v, but Node %v FSM lastHash is %v\", n.name, n.fsm.lastHash, expName, exp.lastHash)\n\t\t\t}\n\t\t}\n\t\tt.Logf(\"node %v final FSM hash is %v\", n.name, n.fsm.lastHash)\n\t}\n\tif t.Failed() {\n\t\tc.RecordState(t)\n\t}\n}\n\nfunc (c *cluster) RecordState(t *testing.T) {\n\ttd, _ := os.MkdirTemp(os.Getenv(\"TEST_FAIL_DIR\"), \"failure\")\n\tsd, _ := resolveDirectory(\"data\", false)\n\tcopyDir(td, sd)\n\tdump := func(n *raftNode) {\n\t\tnt := filepath.Join(td, n.name)\n\t\tos.Mkdir(nt, 0o777)\n\t\tn.fsm.WriteTo(filepath.Join(nt, \"fsm.txt\"))\n\t\tn.transport.DumpLog(nt)\n\t}\n\tfor _, n := range c.nodes {\n\t\tdump(n)\n\t}\n\tfor _, n := range c.removedNodes {\n\t\tdump(n)\n\t}\n\tfmt.Printf(\"State of failing cluster captured in %v\", td)\n}\n\nfunc copyDir(target, src string) {\n\tfilepath.Walk(src, func(path string, info os.FileInfo, err error) error {\n\t\trelPath := path[len(src):]\n\t\tif info.IsDir() {\n\t\t\treturn os.MkdirAll(filepath.Join(target, relPath), 0o777)\n\t\t}\n\t\treturn copyFile(filepath.Join(target, relPath), path)\n\t})\n}\n\nfunc copyFile(target, src string) error {\n\tr, err := os.Open(src)\n\tif err != nil {\n\t\treturn err\n\t}\n\tdefer r.Close()\n\tw, err := os.Create(target)\n\tif err != nil {\n\t\treturn err\n\t}\n\tdefer w.Close()\n\t_, err = io.Copy(w, r)\n\treturn err\n}\n\nfunc (c *cluster) VerifyLog(t *testing.T, applyCount uint64) {\n\tfi, _ := c.nodes[0].store.FirstIndex()\n\tli, _ := c.nodes[0].store.LastIndex()\n\tname := c.nodes[0].name\n\tfor _, n := range c.nodes {\n\t\tnfi, err := n.store.FirstIndex()\n\t\tif err != nil {\n\t\t\tt.Errorf(\"Failed to get FirstIndex of log for node %v: %v\", n.name, err)\n\t\t\tcontinue\n\t\t}\n\t\tif nfi != fi {\n\t\t\tt.Errorf(\"Node %v has FirstIndex of %d but node %v has %d\", n.name, nfi, name, fi)\n\t\t}\n\t\tnli, err := n.store.LastIndex()\n\t\tif err != nil {\n\t\t\tt.Errorf(\"Failed to get LastIndex of log for node %v: %v\", n.name, err)\n\t\t\tcontinue\n\t\t}\n\t\tif nli != li {\n\t\t\tt.Errorf(\"Node %v has LastIndex of %d, but node %v has %d\", n.name, nli, name, li)\n\t\t}\n\t\tif nli-nfi < applyCount {\n\t\t\tt.Errorf(\"Node %v Log contains %d entries, but should contain at least %d\", n.name, nli-nfi, applyCount)\n\t\t\tcontinue\n\t\t}\n\t\tvar term uint64\n\t\tfor i := fi; i <= li; i++ {\n\t\t\tvar nEntry raft.Log\n\t\t\tvar n0Entry raft.Log\n\t\t\tif err := c.nodes[0].store.GetLog(i, &n0Entry); err != nil {\n\t\t\t\tt.Errorf(\"Failed to log entry %d on node %v: %v\", i, name, err)\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\tif err := n.store.GetLog(i, &nEntry); err != nil {\n\t\t\t\tt.Errorf(\"Failed to log entry at log Index %d on node %v: %v\", i, n.name, err)\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\tif i != nEntry.Index {\n\t\t\t\tt.Errorf(\"Asked for Log Index %d from Store on node %v, but got index %d instead\", i, n.name, nEntry.Index)\n\t\t\t}\n\t\t\tif i == fi {\n\t\t\t\tterm = nEntry.Term\n\t\t\t} else {\n\t\t\t\tif nEntry.Term < term {\n\t\t\t\t\tt.Errorf(\"Node %v, Prior Log Entry was for term %d, but this log entry is for term %d, terms shouldn't go backwards\", n.name, term, nEntry.Term)\n\t\t\t\t}\n\t\t\t}\n\t\t\tterm = nEntry.Term\n\t\t\tassertLogEntryEqual(t, n.name, &n0Entry, &nEntry)\n\t\t}\n\t\t// the above checks the logs between the nodes, also check that the log\n\t\t// contains the items that Apply returned success for.\n\t\tvar entry raft.Log\n\t\tfor _, ai := range c.applied {\n\t\t\terr := n.store.GetLog(ai.index, &entry)\n\t\t\tif err != nil {\n\t\t\t\tt.Errorf(\"Failed to fetch logIndex %d on node %v: %v\", ai.index, n.name, err)\n\t\t\t}\n\t\t\tif !bytes.Equal(ai.data, entry.Data) {\n\t\t\t\tt.Errorf(\"Client applied %v at index %d, but log for node %v contains %d\", ai.data, ai.index, n.name, entry.Data)\n\t\t\t}\n\t\t}\n\t}\n}\n\n// assertLogEntryEqual compares the 2 raft Log entries and reports any differences to the supplied testing.T instance\n// it return true if the 2 entries are equal, false otherwise.\nfunc assertLogEntryEqual(t *testing.T, node string, exp *raft.Log, act *raft.Log) bool {\n\tres := true\n\tif exp.Term != act.Term {\n\t\tt.Errorf(\"Log Entry at Index %d for node %v has mismatched terms %d/%d\", exp.Index, node, exp.Term, act.Term)\n\t\tres = false\n\t}\n\tif exp.Index != act.Index {\n\t\tt.Errorf(\"Node %v, Log Entry should be Index %d,but is %d\", node, exp.Index, act.Index)\n\t\tres = false\n\t}\n\tif exp.Type != act.Type {\n\t\tt.Errorf(\"Node %v, Log Entry at Index %d should have type %v but is %v\", node, exp.Index, exp.Type, act.Type)\n\t\tres = false\n\t}\n\tif !bytes.Equal(exp.Data, act.Data) {\n\t\tt.Errorf(\"Node %v, Log Entry at Index %d should have data %v, but has %v\", node, exp.Index, exp.Data, act.Data)\n\t\tres = false\n\t}\n\treturn res\n}\n"
  },
  {
    "path": "fuzzy/fsm.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage fuzzy\n\nimport (\n\t\"bufio\"\n\t\"encoding/binary\"\n\t\"fmt\"\n\t\"hash/adler32\"\n\t\"io\"\n\t\"os\"\n\n\t\"github.com/hashicorp/raft\"\n)\n\ntype logHash struct {\n\tlastHash []byte\n}\n\nfunc (l *logHash) Add(d []byte) {\n\thasher := adler32.New()\n\thasher.Write(l.lastHash)\n\thasher.Write(d)\n\tl.lastHash = hasher.Sum(nil)\n}\n\ntype applyItem struct {\n\tindex uint64\n\tterm  uint64\n\tdata  []byte\n}\n\nfunc (a *applyItem) set(l *raft.Log) {\n\ta.index = l.Index\n\ta.term = l.Term\n\ta.data = make([]byte, len(l.Data))\n\tcopy(a.data, l.Data)\n}\n\ntype fuzzyFSM struct {\n\tlogHash\n\tlastTerm  uint64\n\tlastIndex uint64\n\tapplied   []applyItem\n}\n\nfunc (f *fuzzyFSM) Apply(l *raft.Log) interface{} {\n\tif l.Index <= f.lastIndex {\n\t\tpanic(fmt.Errorf(\"fsm.Apply received log entry with invalid Index %v (lastIndex we saw was %d)\", l, f.lastIndex))\n\t}\n\tif l.Term < f.lastTerm {\n\t\tpanic(fmt.Errorf(\"fsm.Apply received log entry with invalid Term %v (lastTerm we saw was %d)\", l, f.lastTerm))\n\t}\n\tf.lastIndex = l.Index\n\tf.lastTerm = l.Term\n\tf.Add(l.Data)\n\tf.applied = append(f.applied, applyItem{})\n\tf.applied[len(f.applied)-1].set(l)\n\treturn nil\n}\n\nfunc (f *fuzzyFSM) WriteTo(fn string) error {\n\tfw, err := os.Create(fn)\n\tif err != nil {\n\t\treturn err\n\t}\n\tdefer fw.Close()\n\tw := bufio.NewWriter(fw)\n\tdefer w.Flush()\n\tfor _, i := range f.applied {\n\t\tfmt.Fprintf(w, \"%d.%8d: %X\\n\", i.term, i.index, i.data)\n\t}\n\treturn nil\n}\n\nfunc (f *fuzzyFSM) Snapshot() (raft.FSMSnapshot, error) {\n\ts := *f\n\treturn &s, nil\n}\n\nfunc (f *fuzzyFSM) Restore(r io.ReadCloser) error {\n\terr := binary.Read(r, binary.LittleEndian, &f.lastTerm)\n\tif err == nil {\n\t\terr = binary.Read(r, binary.LittleEndian, &f.lastIndex)\n\t}\n\tif err == nil {\n\t\tf.lastHash = make([]byte, adler32.Size)\n\t\t_, err = r.Read(f.lastHash)\n\t}\n\treturn err\n}\n\nfunc (f *fuzzyFSM) Persist(sink raft.SnapshotSink) error {\n\terr := binary.Write(sink, binary.LittleEndian, f.lastTerm)\n\tif err == nil {\n\t\terr = binary.Write(sink, binary.LittleEndian, f.lastIndex)\n\t}\n\tif err == nil {\n\t\t_, err = sink.Write(f.lastHash)\n\t}\n\treturn err\n}\n\nfunc (f *fuzzyFSM) Release() {\n}\n"
  },
  {
    "path": "fuzzy/fsm_batch.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\n//go:build batchtest\n\npackage fuzzy\n\nimport \"github.com/hashicorp/raft\"\n\n// ApplyBatch enables fuzzyFSM to satisfy the BatchingFSM interface. This\n// function is gated by the batchtest build flag.\nfunc (f *fuzzyFSM) ApplyBatch(logs []*raft.Log) []interface{} {\n\tret := make([]interface{}, len(logs))\n\n\tfor _, l := range logs {\n\t\tf.Apply(l)\n\t}\n\n\treturn ret\n}\n"
  },
  {
    "path": "fuzzy/go.mod",
    "content": "module github.com/hashicorp/raft/fuzzy\n\ngo 1.20\n\nrequire (\n\tgithub.com/hashicorp/go-hclog v1.6.3\n\tgithub.com/hashicorp/go-msgpack/v2 v2.1.2\n\tgithub.com/hashicorp/raft v1.2.0\n\tgithub.com/hashicorp/raft-boltdb v0.0.0-20171010151810-6e5ba93211ea\n)\n\nrequire (\n\tgithub.com/armon/go-metrics v0.4.1 // indirect\n\tgithub.com/boltdb/bolt v1.3.1 // indirect\n\tgithub.com/fatih/color v1.13.0 // indirect\n\tgithub.com/hashicorp/go-immutable-radix v1.0.0 // indirect\n\tgithub.com/hashicorp/go-metrics v0.5.4 // indirect\n\tgithub.com/hashicorp/go-msgpack v0.5.5 // indirect\n\tgithub.com/hashicorp/golang-lru v0.5.0 // indirect\n\tgithub.com/mattn/go-colorable v0.1.12 // indirect\n\tgithub.com/mattn/go-isatty v0.0.14 // indirect\n\tgolang.org/x/sys v0.13.0 // indirect\n)\n\nreplace github.com/hashicorp/raft => ../\n"
  },
  {
    "path": "fuzzy/go.sum",
    "content": "cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=\ngithub.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=\ngithub.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=\ngithub.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=\ngithub.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=\ngithub.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=\ngithub.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho=\ngithub.com/armon/go-metrics v0.4.1 h1:hR91U9KYmb6bLBYLQjyM+3j+rcd/UhE+G78SFnF8gJA=\ngithub.com/armon/go-metrics v0.4.1/go.mod h1:E6amYzXo6aW1tqzoZGT755KkbgrJsSdpwZ+3JqfkOG4=\ngithub.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=\ngithub.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=\ngithub.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=\ngithub.com/boltdb/bolt v1.3.1 h1:JQmyP4ZBrce+ZQu0dY660FMfatumYDLun9hBCUVIkF4=\ngithub.com/boltdb/bolt v1.3.1/go.mod h1:clJnj/oiGkjum5o1McbSZDSLxVThjynRyGBgiAx27Ps=\ngithub.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=\ngithub.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag=\ngithub.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I=\ngithub.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=\ngithub.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=\ngithub.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=\ngithub.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w=\ngithub.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk=\ngithub.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=\ngithub.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=\ngithub.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY=\ngithub.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=\ngithub.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=\ngithub.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A=\ngithub.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=\ngithub.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=\ngithub.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=\ngithub.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=\ngithub.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=\ngithub.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=\ngithub.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=\ngithub.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=\ngithub.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=\ngithub.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=\ngithub.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=\ngithub.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=\ngithub.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=\ngithub.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=\ngithub.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=\ngithub.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=\ngithub.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=\ngithub.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=\ngithub.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=\ngithub.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k=\ngithub.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M=\ngithub.com/hashicorp/go-immutable-radix v1.0.0 h1:AKDB1HM5PWEA7i4nhcpwOrO2byshxBjXVn/J/3+z5/0=\ngithub.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=\ngithub.com/hashicorp/go-metrics v0.5.4 h1:8mmPiIJkTPPEbAiV97IxdAGNdRdaWwVap1BU6elejKY=\ngithub.com/hashicorp/go-metrics v0.5.4/go.mod h1:CG5yz4NZ/AI/aQt9Ucm/vdBnbh7fvmv4lxZ350i+QQI=\ngithub.com/hashicorp/go-msgpack v0.5.5 h1:i9R9JSrqIz0QVLz3sz+i3YJdT7TTSLcfLLzJi9aZTuI=\ngithub.com/hashicorp/go-msgpack v0.5.5/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM=\ngithub.com/hashicorp/go-msgpack/v2 v2.1.2 h1:4Ee8FTp834e+ewB71RDrQ0VKpyFdrKOjvYtnQ/ltVj0=\ngithub.com/hashicorp/go-msgpack/v2 v2.1.2/go.mod h1:upybraOAblm4S7rx0+jeNy+CWWhzywQsSRV5033mMu4=\ngithub.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs=\ngithub.com/hashicorp/go-uuid v1.0.0 h1:RS8zrF7PhGwyNPOtxSClXXj9HA8feRnJzgnI1RJCSnM=\ngithub.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=\ngithub.com/hashicorp/golang-lru v0.5.0 h1:CL2msUPvZTLb5O648aiLNJw3hnBxN2+1Jq8rCOH9wdo=\ngithub.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=\ngithub.com/hashicorp/raft-boltdb v0.0.0-20171010151810-6e5ba93211ea h1:xykPFhrBAS2J0VBzVa5e80b5ZtYuNQtgXjN40qBZlD4=\ngithub.com/hashicorp/raft-boltdb v0.0.0-20171010151810-6e5ba93211ea/go.mod h1:pNv7Wc3ycL6F5oOWn+tPGo2gWD4a5X+yp/ntwdKLjRk=\ngithub.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=\ngithub.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=\ngithub.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=\ngithub.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=\ngithub.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=\ngithub.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=\ngithub.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM=\ngithub.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=\ngithub.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=\ngithub.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=\ngithub.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=\ngithub.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=\ngithub.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=\ngithub.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=\ngithub.com/mattn/go-colorable v0.1.12 h1:jF+Du6AlPIjs2BiUiQlKOX0rt3SujHxPnksPKZbaA40=\ngithub.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=\ngithub.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=\ngithub.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y=\ngithub.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=\ngithub.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=\ngithub.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=\ngithub.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=\ngithub.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=\ngithub.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=\ngithub.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=\ngithub.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=\ngithub.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY=\ngithub.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=\ngithub.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=\ngithub.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=\ngithub.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=\ngithub.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=\ngithub.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=\ngithub.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=\ngithub.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=\ngithub.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU=\ngithub.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M=\ngithub.com/prometheus/client_golang v1.11.1/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0=\ngithub.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=\ngithub.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=\ngithub.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=\ngithub.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=\ngithub.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4=\ngithub.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo=\ngithub.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc=\ngithub.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=\ngithub.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=\ngithub.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A=\ngithub.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=\ngithub.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA=\ngithub.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=\ngithub.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=\ngithub.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=\ngithub.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=\ngithub.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=\ngithub.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=\ngithub.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=\ngithub.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=\ngithub.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals=\ngithub.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=\ngithub.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM=\ngolang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=\ngolang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=\ngolang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=\ngolang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=\ngolang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=\ngolang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=\ngolang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=\ngolang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=\ngolang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=\ngolang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=\ngolang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=\ngolang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=\ngolang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=\ngolang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=\ngolang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=\ngolang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=\ngolang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=\ngolang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=\ngoogle.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=\ngoogle.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=\ngoogle.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=\ngoogle.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=\ngoogle.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=\ngoogle.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=\ngoogle.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=\ngoogle.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=\ngopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=\ngopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=\ngopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=\ngopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=\ngopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=\n"
  },
  {
    "path": "fuzzy/leadershiptransfer_test.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage fuzzy\n\nimport (\n\t\"math/rand\"\n\t\"testing\"\n\t\"time\"\n\n\t\"github.com/hashicorp/raft\"\n)\n\n// 5 node cluster\nfunc TestRaft_FuzzyLeadershipTransfer(t *testing.T) {\n\tcluster := newRaftCluster(t, testLogWriter, \"lt\", 5, nil)\n\tr := rand.New(rand.NewSource(time.Now().UnixNano()))\n\n\ts := newApplySource(\"LeadershipTransfer\")\n\tdata := cluster.generateNApplies(s, uint(r.Intn(10000)))\n\tfutures := cluster.sendNApplies(time.Minute, data)\n\tcluster.leadershipTransfer(time.Minute)\n\n\tdata = cluster.generateNApplies(s, uint(r.Intn(10000)))\n\tfutures = append(futures, cluster.sendNApplies(time.Minute, data)...)\n\tcluster.leadershipTransfer(time.Minute)\n\n\tdata = cluster.generateNApplies(s, uint(r.Intn(10000)))\n\tfutures = append(futures, cluster.sendNApplies(time.Minute, data)...)\n\tcluster.leadershipTransfer(time.Minute)\n\n\tdata = cluster.generateNApplies(s, uint(r.Intn(10000)))\n\tfutures = append(futures, cluster.sendNApplies(time.Minute, data)...)\n\n\tac := cluster.checkApplyFutures(futures)\n\n\tcluster.Stop(t, time.Minute)\n\tcluster.VerifyLog(t, ac)\n\tcluster.VerifyFSM(t)\n}\n\ntype LeadershipTransferMode int\n\ntype LeadershipTransfer struct {\n\tverifier  appendEntriesVerifier\n\tslowNodes map[string]bool\n\tdelayMin  time.Duration\n\tdelayMax  time.Duration\n\tmode      LeadershipTransferMode\n}\n\nfunc (lt *LeadershipTransfer) Report(t *testing.T) {\n\tlt.verifier.Report(t)\n}\n\nfunc (lt *LeadershipTransfer) PreRPC(s, t string, r *raft.RPC) error {\n\treturn nil\n}\n\nfunc (lt *LeadershipTransfer) nap() {\n\td := lt.delayMin + time.Duration(rand.Int63n((lt.delayMax - lt.delayMin).Nanoseconds()))\n\ttime.Sleep(d)\n}\n\nfunc (lt *LeadershipTransfer) PostRPC(src, target string, r *raft.RPC, res *raft.RPCResponse) error {\n\treturn nil\n}\n\nfunc (lt *LeadershipTransfer) PreRequestVote(src, target string, v *raft.RequestVoteRequest) (*raft.RequestVoteResponse, error) {\n\treturn nil, nil\n}\n\nfunc (lt *LeadershipTransfer) PreAppendEntries(src, target string, v *raft.AppendEntriesRequest) (*raft.AppendEntriesResponse, error) {\n\tlt.verifier.PreAppendEntries(src, target, v)\n\treturn nil, nil\n}\n"
  },
  {
    "path": "fuzzy/membership_test.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage fuzzy\n\nimport (\n\t\"io\"\n\t\"log\"\n\t\"os\"\n\t\"path/filepath\"\n\t\"testing\"\n\t\"time\"\n)\n\nvar testLogWriter io.Writer\n\nfunc init() {\n\ttestLogWriter = os.Stdout\n\tlogDir := os.Getenv(\"TEST_LOG_DIR\")\n\tif logDir != \"\" {\n\t\tf, err := os.Create(filepath.Join(logDir, \"debug.log\"))\n\t\tif err != nil {\n\t\t\tlog.Fatalf(\"TEST_LOG_DIR Env set, but unable to create log file: %v\\n\", err)\n\t\t}\n\t\ttestLogWriter = f\n\t}\n}\n\n// this runs a 3 node cluster then expands it to a 5 node cluster and checks all 5 nodes agree at the end\nfunc TestRaft_AddMembership(t *testing.T) {\n\tv := appendEntriesVerifier{}\n\tv.Init()\n\tcluster := newRaftCluster(t, testLogWriter, \"m\", 3, &v)\n\ts := newApplySource(\"AddMembership\")\n\tinitApplied := cluster.ApplyN(t, time.Minute, s, 100)\n\ta := s.apply(t, cluster, 1000)\n\tif err := cluster.CreateAndAddNode(t, testLogWriter, \"m\", 3); err != nil {\n\t\tt.Fatalf(\"Failed to add node m3: %v\", err)\n\t}\n\tif err := cluster.CreateAndAddNode(t, testLogWriter, \"m\", 4); err != nil {\n\t\tt.Fatalf(\"Failed to add node m4: %v\", err)\n\t}\n\ttime.Sleep(time.Second * 5)\n\ta.stop()\n\tcluster.Stop(t, time.Minute)\n\tv.Report(t)\n\tcluster.VerifyLog(t, uint64(a.applied+initApplied))\n\tcluster.VerifyFSM(t)\n}\n\n// starts with 3 nodes, goes to 5, then goes back to 3, but never removes the leader.\nfunc TestRaft_AddRemoveNodesNotLeader(t *testing.T) {\n\tv := appendEntriesVerifier{}\n\tv.Init()\n\tcluster := newRaftCluster(t, testLogWriter, \"ar\", 3, &v)\n\ts := newApplySource(\"AddRemoveNodesNotLeader\")\n\tinitApplied := cluster.ApplyN(t, time.Minute, s, 100)\n\ta := s.apply(t, cluster, 1000)\n\tcluster.CreateAndAddNode(t, testLogWriter, \"ar\", 3)\n\tcluster.CreateAndAddNode(t, testLogWriter, \"ar\", 4)\n\tldr := cluster.Leader(time.Minute)\n\tremoved := 0\n\tfor _, rn := range cluster.nodes {\n\t\tif rn.name != ldr.name {\n\t\t\tcluster.RemoveNode(t, rn.name)\n\t\t\tremoved++\n\t\t\tif removed >= 2 {\n\t\t\t\tbreak\n\t\t\t}\n\t\t}\n\t}\n\ta.stop()\n\tcluster.Stop(t, time.Minute)\n\tv.Report(t)\n\tcluster.VerifyLog(t, uint64(a.applied+initApplied))\n\tcluster.VerifyFSM(t)\n}\n\n// starts with a 5 node cluster then removes the leader.\nfunc TestRaft_RemoveLeader(t *testing.T) {\n\tv := appendEntriesVerifier{}\n\tv.Init()\n\tcluster := newRaftCluster(t, testLogWriter, \"rl\", 5, &v)\n\ts := newApplySource(\"RemoveLeader\")\n\tinitApplied := cluster.ApplyN(t, time.Minute, s, 100)\n\ta := s.apply(t, cluster, 100)\n\ttime.Sleep(time.Second)\n\tldr := cluster.Leader(time.Minute)\n\tcluster.RemoveNode(t, ldr.name)\n\ttime.Sleep(5 * time.Second)\n\ta.stop()\n\tcluster.Stop(t, time.Minute)\n\tv.Report(t)\n\tcluster.VerifyLog(t, uint64(a.applied+initApplied))\n\tcluster.VerifyFSM(t)\n\tldr.raft.Shutdown()\n}\n\n// starts with a 5 node cluster, partitions off one node, and then removes it from the cluster on the other partition\nfunc TestRaft_RemovePartitionedNode(t *testing.T) {\n\thooks := NewPartitioner()\n\tcluster := newRaftCluster(t, testLogWriter, \"rmp\", 5, hooks)\n\ts := newApplySource(\"RemovePartitionedNode\")\n\tinitApplied := cluster.ApplyN(t, time.Minute, s, 101)\n\ta := s.apply(t, cluster, 100)\n\tnodes := cluster.LeaderPlus(3)\n\tvictim := nodes[len(nodes)-1]\n\thooks.PartitionOff(cluster.log, []*raftNode{victim})\n\ttime.Sleep(3 * time.Second)\n\tremoved := cluster.RemoveNode(t, victim.name)\n\ttime.Sleep(3 * time.Second)\n\thooks.HealAll(cluster.log)\n\ttime.Sleep(10 * time.Second)\n\ta.stop()\n\tcluster.Stop(t, time.Minute)\n\thooks.Report(t)\n\tcluster.VerifyLog(t, uint64(a.applied+initApplied))\n\tcluster.VerifyFSM(t)\n\n\t// we should verify that the partitioned node see that it was removed & shutdown\n\t// but it never gets notified of that, so we can't verify that currently.\n\tremoved.raft.Shutdown()\n}\n"
  },
  {
    "path": "fuzzy/node.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage fuzzy\n\nimport (\n\t\"fmt\"\n\t\"path/filepath\"\n\t\"time\"\n\n\t\"github.com/hashicorp/go-hclog\"\n\t\"github.com/hashicorp/raft\"\n\trdb \"github.com/hashicorp/raft-boltdb\"\n)\n\ntype raftNode struct {\n\ttransport *transport\n\tstore     *rdb.BoltStore\n\traft      *raft.Raft\n\tlog       hclog.Logger\n\tfsm       *fuzzyFSM\n\tname      string\n\tdir       string\n}\n\nfunc newRaftNode(logger hclog.Logger, tc *transports, h TransportHooks, nodes []string, name string) (*raftNode, error) {\n\tvar err error\n\tvar datadir string\n\tdatadir, err = resolveDirectory(fmt.Sprintf(\"data/%v\", name), true)\n\tif err != nil {\n\t\treturn nil, err\n\t}\n\tlogger.Info(\"[INFO] Creating new raft Node with data in dir %v\", datadir)\n\tvar ss *raft.FileSnapshotStore\n\tss, err = raft.NewFileSnapshotStoreWithLogger(datadir, 5, logger)\n\n\tif err != nil {\n\t\treturn nil, fmt.Errorf(\"unable to initialize snapshots %v\", err.Error())\n\t}\n\ttransport := tc.AddNode(name, h)\n\n\tconfig := raft.DefaultConfig()\n\tconfig.SnapshotThreshold = 1409600\n\tconfig.SnapshotInterval = time.Hour\n\tconfig.Logger = logger\n\tconfig.ShutdownOnRemove = false\n\tconfig.LocalID = raft.ServerID(name)\n\n\tvar store *rdb.BoltStore\n\tstore, err = rdb.NewBoltStore(filepath.Join(datadir, \"store.bolt\"))\n\tif err != nil {\n\t\treturn nil, fmt.Errorf(\"unable to initialize log %v\", err.Error())\n\t}\n\n\tif len(nodes) > 0 {\n\t\tc := make([]raft.Server, 0, len(nodes))\n\t\tfor _, n := range nodes {\n\t\t\tc = append(c, raft.Server{Suffrage: raft.Voter, ID: raft.ServerID(n), Address: raft.ServerAddress(n)})\n\t\t}\n\t\tconfiguration := raft.Configuration{Servers: c}\n\n\t\tif err = raft.BootstrapCluster(config, store, store, ss, transport, configuration); err != nil {\n\t\t\treturn nil, err\n\t\t}\n\t}\n\tfsm := &fuzzyFSM{}\n\tvar r *raft.Raft\n\tr, err = raft.NewRaft(config, fsm, store, store, ss, transport)\n\tif err != nil {\n\t\treturn nil, err\n\t}\n\tn := raftNode{\n\t\ttransport: transport,\n\t\tstore:     store,\n\t\traft:      r,\n\t\tfsm:       fsm,\n\t\tlog:       logger,\n\t\tname:      name,\n\t\tdir:       datadir,\n\t}\n\treturn &n, nil\n}\n"
  },
  {
    "path": "fuzzy/partition_test.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage fuzzy\n\nimport (\n\t\"bytes\"\n\t\"fmt\"\n\t\"math/rand\"\n\t\"sync\"\n\t\"testing\"\n\t\"time\"\n\n\t\"github.com/hashicorp/raft\"\n)\n\n// 5 node cluster where the leader and another node get regularly partitioned off\n// eventually all partitions heal.\nfunc TestRaft_LeaderPartitions(t *testing.T) {\n\thooks := NewPartitioner()\n\tcluster := newRaftCluster(t, testLogWriter, \"lp\", 5, hooks)\n\tcluster.Leader(time.Second * 10)\n\ts := newApplySource(\"LeaderPartitions\")\n\tapplier := s.apply(t, cluster, 5)\n\tfor i := 0; i < 10; i++ {\n\t\tpg := hooks.PartitionOff(cluster.log, cluster.LeaderPlus(rand.Intn(4)))\n\t\ttime.Sleep(time.Second * 4)\n\t\tr := rand.Intn(10)\n\t\tif r < 1 {\n\t\t\tcluster.log.Logf(\"Healing no partitions!\")\n\t\t} else if r < 4 {\n\t\t\thooks.HealAll(cluster.log)\n\t\t} else {\n\t\t\thooks.Heal(cluster.log, pg)\n\t\t}\n\t\ttime.Sleep(time.Second * 5)\n\t}\n\thooks.HealAll(cluster.log)\n\tcluster.Leader(time.Hour)\n\tapplier.stop()\n\tcluster.Stop(t, time.Minute*10)\n\thooks.Report(t)\n\tcluster.VerifyLog(t, applier.applied)\n\tcluster.VerifyFSM(t)\n}\n\ntype Partitioner struct {\n\tverifier appendEntriesVerifier\n\tlock     sync.RWMutex // protects partitioned / nextGroup\n\t// this is a map of node -> partition group, only nodes in the same partition group can communicate with each other\n\tpartitioned map[string]int\n\tnextGroup   int\n}\n\nfunc NewPartitioner() *Partitioner {\n\tp := &Partitioner{\n\t\tpartitioned: make(map[string]int),\n\t\tnextGroup:   1,\n\t}\n\tp.verifier.Init()\n\treturn p\n}\n\n// PartitionOff creates a partition where the supplied nodes can only communicate with each other\n// returns the partition group, which can be used later with Heal to heal this specific partition\nfunc (p *Partitioner) PartitionOff(l Logger, nodes []*raftNode) int {\n\tnn := make([]string, 0, len(nodes))\n\tp.lock.Lock()\n\tdefer p.lock.Unlock()\n\tpGroup := p.nextGroup\n\tp.nextGroup++\n\tfor _, n := range nodes {\n\t\tp.partitioned[n.name] = pGroup\n\t\tnn = append(nn, n.name)\n\t}\n\tl.Logf(\"Created partition %d with nodes %v, partitions now are %v\", pGroup, nn, p)\n\treturn pGroup\n}\n\nfunc (p *Partitioner) Heal(l Logger, pGroup int) {\n\tp.lock.Lock()\n\tdefer p.lock.Unlock()\n\tfor k, v := range p.partitioned {\n\t\tif v == pGroup {\n\t\t\tp.partitioned[k] = 0\n\t\t}\n\t}\n\tl.Logf(\"Healing partition group %d, now partitions are %v\", pGroup, p)\n}\n\nfunc (p *Partitioner) String() string {\n\tpl := make([][]string, 0, 10)\n\tfor n, pv := range p.partitioned {\n\t\tif pv > 0 {\n\t\t\tfor pv >= len(pl) {\n\t\t\t\tpl = append(pl, nil)\n\t\t\t}\n\t\t\tpl[pv] = append(pl[pv], n)\n\t\t}\n\t}\n\tb := bytes.Buffer{}\n\tfor i, n := range pl {\n\t\tif len(n) > 0 {\n\t\t\tif b.Len() > 0 {\n\t\t\t\tb.WriteString(\", \")\n\t\t\t}\n\t\t\tfmt.Fprintf(&b, \"%d = %v\", i, n)\n\t\t}\n\t}\n\tif b.Len() == 0 {\n\t\treturn \"[None]\"\n\t}\n\treturn b.String()\n}\n\nfunc (p *Partitioner) HealAll(l Logger) {\n\tp.lock.Lock()\n\tdefer p.lock.Unlock()\n\tp.partitioned = make(map[string]int)\n\tl.Logf(\"Healing all partitions, partitions now %v\", p)\n}\n\nfunc (p *Partitioner) Report(t *testing.T) {\n\tp.verifier.Report(t)\n}\n\nfunc (p *Partitioner) PreRPC(s, t string, r *raft.RPC) error {\n\tp.lock.RLock()\n\tsp := p.partitioned[s]\n\tst := p.partitioned[t]\n\tp.lock.RUnlock()\n\tif sp == st {\n\t\treturn nil\n\t}\n\treturn fmt.Errorf(\"unable to connect to %v, from %v\", t, s)\n}\n\nfunc (p *Partitioner) PostRPC(s, t string, req *raft.RPC, res *raft.RPCResponse) error {\n\treturn nil\n}\n\nfunc (p *Partitioner) PreRequestVote(src, target string, v *raft.RequestVoteRequest) (*raft.RequestVoteResponse, error) {\n\treturn nil, nil\n}\n\nfunc (p *Partitioner) PreAppendEntries(src, target string, v *raft.AppendEntriesRequest) (*raft.AppendEntriesResponse, error) {\n\treturn nil, nil\n}\n"
  },
  {
    "path": "fuzzy/readme.md",
    "content": "# Fuzzy Raft\n\nInspired by http://colin-scott.github.io/blog/2015/10/07/fuzzing-raft-for-fun-and-profit/ this package \nis a framework and set of test scenarios for testing the behavior and correctness of the raft library\nunder various conditions.\n\n## Framework\n\nThe framework allows you to construct multiple node raft clusters, connected by an instrumented transport \nthat allows a test to inject various transport level behaviors to simulate various scenarios (e.g. you \ncan have your hook fail all transport calls to a particular node to simulate it being partitioned off \nthe network). There are helper classes to create and Apply well know sequences of test data, and to \nexamine the final state of the cluster, the nodes FSMs and the raft log. \n\n## Running\n\nThe tests run with the standard go test framework, run with go test . [from this dir] or use make fuzz from\nthe parent directory. As these tests are looking for timing and other edge cases, a pass from a single run\nisn't enough, the tests needs running repeatedly to build up confidence.\n\n## Test Scenarios\n\nThe follow test scenario's are currently implemented. Each test concludes with a standard set of validations\n\n * Each node raft log contains the same set of entries (term/index/data).\n * The raft log contains data matching the client request for each call to raft.Apply() that reported success.\n * Each node's FSM saw the same sequence of Apply(*raft.Log) calls.\n * A verifier at the transport level verifies a number of transport level invariants.\n\nMost tests run with a background workload that is constantly apply()ing new entries to the log. [when there's a leader]\n\n### TestRaft_LeaderPartitions\n\nThis creates a 5 node cluster and then repeated partitions multiple nodes off (including the current leader), \nthen heals the partition and repeats. At the end all partitions are removed. [clearly inspired by Jepson]\n\n### TestRaft_NoIssueSanity\n\nIs a basic 5 node cluster test, it starts a 5 node cluster applies some data, then does the verifications\n\n### TestRaft_SlowSendVote\n\nTests what happens when RequestVote requests are delaying being sent to other nodes\n\n### TestRaft_SlowRecvVote\n\nTests what happens when RequestVote responses are delaying being received by the sender.\n\n### TestRaft_AddMembership\n\nStarts a 3 node cluster, and then adds 2 new members to the cluster.\n\n### TestRaft_AddRemoveNodesNotLeader\n\nStarts a 5 node cluster, and then then removes 2 follower nodes from the cluster.\n\n### TestRaft_RemoveLeader\n\nStarts a 5 node cluster, and then removes the node that is the leader.\n\n### TestRaft_RemovePartitionedNode\n\nStarts a 5 node cluster, partitions one of the follower nodes off the network, and then tells the leader to remove that node, then heals the partition.\n"
  },
  {
    "path": "fuzzy/resolve.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage fuzzy\n\nimport (\n\t\"os\"\n\t\"path/filepath\"\n)\n\n// resolveDirectory returns a full directory path based on the supplied dir path\n// if the supplied dir path is absolute (i.e. it starts with / ) then it is\n// returned as is, if it's a relative path, then it is assumed to be relative\n// to the executable, and that is computed and returned.\n//\n// if create is true, then the directory path will be created if it doesn't\n// already exist\n//\n// if create is false, then it's upto the caller to ensure it exists and/or\n// create it as needed [this won't verify that it exists]\nfunc resolveDirectory(dir string, create bool) (string, error) {\n\tvar resolved string\n\tif filepath.IsAbs(dir) {\n\t\tresolved = dir\n\t} else {\n\t\texecdir, err := filepath.Abs(filepath.Dir(os.Args[0]))\n\t\tif err != nil {\n\t\t\treturn \"\", err\n\t\t}\n\t\tresolved = filepath.Join(execdir, dir)\n\t}\n\tif create {\n\t\tif _, err := os.Stat(resolved); os.IsNotExist(err) {\n\t\t\tif err := os.MkdirAll(resolved, 0o744); err != nil {\n\t\t\t\treturn \"\", err\n\t\t\t}\n\t\t}\n\t}\n\treturn resolved, nil\n}\n"
  },
  {
    "path": "fuzzy/simple_test.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage fuzzy\n\nimport (\n\t\"testing\"\n\t\"time\"\n)\n\n// this runs a 5 node cluster with verifications turned on, but no failures or issues injected.\nfunc TestRaft_NoIssueSanity(t *testing.T) {\n\tv := appendEntriesVerifier{}\n\tv.Init()\n\tcluster := newRaftCluster(t, testLogWriter, \"node\", 5, &v)\n\ts := newApplySource(\"NoIssueSanity\")\n\tapplyCount := cluster.ApplyN(t, time.Minute, s, 10000)\n\tcluster.Stop(t, time.Minute)\n\tv.Report(t)\n\tcluster.VerifyLog(t, applyCount)\n\tcluster.VerifyFSM(t)\n}\n"
  },
  {
    "path": "fuzzy/slowvoter_test.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage fuzzy\n\nimport (\n\t\"math/rand\"\n\t\"testing\"\n\t\"time\"\n\n\t\"github.com/hashicorp/raft\"\n)\n\n// 5 node cluster where 2 nodes always see a delay in getting a request vote msg.\nfunc TestRaft_SlowSendVote(t *testing.T) {\n\thooks := NewSlowVoter(\"sv_0\", \"sv_1\")\n\tcluster := newRaftCluster(t, testLogWriter, \"sv\", 5, hooks)\n\ts := newApplySource(\"SlowSendVote\")\n\tac := cluster.ApplyN(t, time.Minute, s, 10000)\n\tcluster.Stop(t, time.Minute)\n\thooks.Report(t)\n\tcluster.VerifyLog(t, ac)\n\tcluster.VerifyFSM(t)\n}\n\n// 5 node cluster where vote results from 3 nodes are slow to turn up.\n// [they see the vote request normally, but their response is slow]\nfunc TestRaft_SlowRecvVote(t *testing.T) {\n\thooks := NewSlowVoter(\"svr_1\", \"svr_4\", \"svr_3\")\n\thooks.mode = SlowRecv\n\tcluster := newRaftCluster(t, testLogWriter, \"svr\", 5, hooks)\n\ts := newApplySource(\"SlowRecvVote\")\n\tac := cluster.ApplyN(t, time.Minute, s, 10000)\n\tcluster.Stop(t, time.Minute)\n\thooks.Report(t)\n\tcluster.VerifyLog(t, ac)\n\tcluster.VerifyFSM(t)\n}\n\ntype SlowVoterMode int\n\nconst (\n\tSlowSend SlowVoterMode = iota\n\tSlowRecv\n)\n\ntype SlowVoter struct {\n\tverifier  appendEntriesVerifier\n\tslowNodes map[string]bool\n\tdelayMin  time.Duration\n\tdelayMax  time.Duration\n\tmode      SlowVoterMode\n}\n\nfunc NewSlowVoter(slowNodes ...string) *SlowVoter {\n\tsv := SlowVoter{\n\t\tslowNodes: make(map[string]bool, len(slowNodes)),\n\t\tdelayMin:  time.Second,\n\t\tdelayMax:  time.Second * 2,\n\t\tmode:      SlowSend,\n\t}\n\tfor _, n := range slowNodes {\n\t\tsv.slowNodes[n] = true\n\t}\n\tsv.verifier.Init()\n\treturn &sv\n}\n\nfunc (sv *SlowVoter) Report(t *testing.T) {\n\tsv.verifier.Report(t)\n}\n\nfunc (sv *SlowVoter) PreRPC(s, t string, r *raft.RPC) error {\n\treturn nil\n}\n\nfunc (sv *SlowVoter) nap() {\n\td := sv.delayMin + time.Duration(rand.Int63n((sv.delayMax - sv.delayMin).Nanoseconds()))\n\ttime.Sleep(d)\n}\n\nfunc (sv *SlowVoter) PostRPC(src, target string, r *raft.RPC, res *raft.RPCResponse) error {\n\tif sv.mode == SlowRecv && sv.slowNodes[target] {\n\t\t_, ok := r.Command.(*raft.RequestVoteRequest)\n\t\tif ok {\n\t\t\tsv.nap()\n\t\t}\n\t}\n\treturn nil\n}\n\nfunc (sv *SlowVoter) PreRequestVote(src, target string, v *raft.RequestVoteRequest) (*raft.RequestVoteResponse, error) {\n\tif sv.mode == SlowSend && sv.slowNodes[target] {\n\t\tsv.nap()\n\t}\n\treturn nil, nil\n}\n\nfunc (sv *SlowVoter) PreAppendEntries(src, target string, v *raft.AppendEntriesRequest) (*raft.AppendEntriesResponse, error) {\n\tsv.verifier.PreAppendEntries(src, target, v)\n\treturn nil, nil\n}\n"
  },
  {
    "path": "fuzzy/transport.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage fuzzy\n\nimport (\n\t\"bufio\"\n\t\"bytes\"\n\t\"errors\"\n\t\"fmt\"\n\t\"io\"\n\t\"os\"\n\t\"path/filepath\"\n\t\"sync\"\n\t\"time\"\n\n\t\"github.com/hashicorp/go-hclog\"\n\t\"github.com/hashicorp/go-msgpack/v2/codec\"\n\t\"github.com/hashicorp/raft\"\n)\n\nvar codecHandle codec.MsgpackHandle\n\ntype appendEntries struct {\n\tsource      string\n\ttarget      raft.ServerAddress\n\tterm        uint64\n\tfirstIndex  uint64\n\tlastIndex   uint64\n\tcommitIndex uint64\n}\n\ntype transports struct {\n\tsync.RWMutex\n\tnodes map[string]*transport\n\tlog   hclog.Logger\n}\n\nfunc newTransports(log hclog.Logger) *transports {\n\treturn &transports{\n\t\tnodes: make(map[string]*transport),\n\t\tlog:   log,\n\t}\n}\n\nfunc (tc *transports) AddNode(n string, hooks TransportHooks) *transport {\n\tt := newTransport(n, tc, hooks)\n\tt.log = tc.log\n\ttc.Lock()\n\tdefer tc.Unlock()\n\ttc.nodes[n] = t\n\treturn t\n}\n\n// TransportHooks allow a test to customize the behavior of the transport.\n// [if you return an error from a PreXXX call, then the error is returned to the caller, and the RPC never made]\ntype TransportHooks interface {\n\t// PreRPC is called before every single RPC call from the transport\n\tPreRPC(src, target string, r *raft.RPC) error\n\t// PostRPC is called after the RPC call has been processed by the target, but before the source see's the response\n\tPostRPC(src, target string, r *raft.RPC, result *raft.RPCResponse) error\n\t// PreREquestVote is called before sending a RequestVote RPC request.\n\tPreRequestVote(src, target string, r *raft.RequestVoteRequest) (*raft.RequestVoteResponse, error)\n\t// PreAppendEntries is called before sending an AppendEntries RPC request.\n\tPreAppendEntries(src, target string, r *raft.AppendEntriesRequest) (*raft.AppendEntriesResponse, error)\n}\n\ntype transport struct {\n\tlog        hclog.Logger\n\ttransports *transports\n\tnode       string\n\tae         []appendEntries\n\n\tconsumer chan raft.RPC\n\thooks    TransportHooks\n}\n\nfunc newTransport(node string, tc *transports, hooks TransportHooks) *transport {\n\treturn &transport{\n\t\tnode:       node,\n\t\ttransports: tc,\n\t\thooks:      hooks,\n\t\tconsumer:   make(chan raft.RPC),\n\t\tae:         make([]appendEntries, 0, 50000),\n\t}\n}\n\n// Consumer returns a channel that can be used to\n// consume and respond to RPC requests.\nfunc (t *transport) Consumer() <-chan raft.RPC {\n\treturn t.consumer\n}\n\n// LocalAddr is used to return our local address to distinguish from our peers.\nfunc (t *transport) LocalAddr() raft.ServerAddress {\n\treturn raft.ServerAddress(t.node)\n}\n\nfunc (t *transport) sendRPC(target string, req interface{}, resp interface{}) error {\n\tt.transports.RLock()\n\ttt := t.transports.nodes[target]\n\tif tt == nil {\n\t\tt.log.Info(\"sendRPC unknown node\", \"target\", target, \"transports\", t.transports.nodes)\n\t\tt.transports.RUnlock()\n\t\treturn fmt.Errorf(\"unknown target host %v\", target)\n\t}\n\tt.transports.RUnlock()\n\trc := make(chan raft.RPCResponse, 1)\n\n\tbuff := bytes.Buffer{}\n\tif err := codec.NewEncoder(&buff, &codecHandle).Encode(req); err != nil {\n\t\treturn err\n\t}\n\trpc := raft.RPC{RespChan: rc}\n\tvar reqVote raft.RequestVoteRequest\n\tvar timeoutNow raft.TimeoutNowRequest\n\tvar appEnt raft.AppendEntriesRequest\n\tdec := codec.NewDecoderBytes(buff.Bytes(), &codecHandle)\n\tswitch req.(type) {\n\tcase *raft.TimeoutNowRequest:\n\t\tif err := dec.Decode(&timeoutNow); err != nil {\n\t\t\treturn err\n\t\t}\n\t\trpc.Command = &timeoutNow\n\tcase *raft.RequestVoteRequest:\n\t\tif err := dec.Decode(&reqVote); err != nil {\n\t\t\treturn err\n\t\t}\n\t\trpc.Command = &reqVote\n\tcase *raft.AppendEntriesRequest:\n\t\tif err := dec.Decode(&appEnt); err != nil {\n\t\t\treturn err\n\t\t}\n\t\trpc.Command = &appEnt\n\tdefault:\n\t\tt.log.Warn(\"unexpected request type\", \"type\", hclog.Fmt(\"%T\", req), \"request\", req)\n\t}\n\tvar result *raft.RPCResponse\n\tif t.hooks != nil {\n\t\tif err := t.hooks.PreRPC(t.node, target, &rpc); err != nil {\n\t\t\treturn err\n\t\t}\n\t\tswitch req.(type) {\n\t\tcase *raft.RequestVoteRequest:\n\t\t\thr, err := t.hooks.PreRequestVote(t.node, target, &reqVote)\n\t\t\tif hr != nil || err != nil {\n\t\t\t\tresult = &raft.RPCResponse{Response: hr, Error: err}\n\t\t\t}\n\t\tcase *raft.AppendEntriesRequest:\n\t\t\thr, err := t.hooks.PreAppendEntries(t.node, target, &appEnt)\n\t\t\tif hr != nil || err != nil {\n\t\t\t\tresult = &raft.RPCResponse{Response: hr, Error: err}\n\t\t\t}\n\t\t}\n\t}\n\tif result == nil {\n\t\ttt.consumer <- rpc\n\t\tcr := <-rc\n\t\tresult = &cr\n\t}\n\n\tif t.hooks != nil {\n\t\terr := t.hooks.PostRPC(t.node, target, &rpc, result)\n\t\tif err != nil {\n\t\t\tresult.Error = err\n\t\t}\n\t}\n\tbuff = bytes.Buffer{}\n\tcodec.NewEncoder(&buff, &codecHandle).Encode(result.Response)\n\tcodec.NewDecoderBytes(buff.Bytes(), &codecHandle).Decode(resp)\n\treturn result.Error\n}\n\n// TimeoutNow implements the Transport interface.\nfunc (t *transport) TimeoutNow(id raft.ServerID, target raft.ServerAddress, args *raft.TimeoutNowRequest, resp *raft.TimeoutNowResponse) error {\n\treturn t.sendRPC(string(target), args, resp)\n}\n\n// AppendEntries sends the appropriate RPC to the target node.\nfunc (t *transport) AppendEntries(id raft.ServerID, target raft.ServerAddress, args *raft.AppendEntriesRequest, resp *raft.AppendEntriesResponse) error {\n\tae := appendEntries{\n\t\tsource:      t.node,\n\t\ttarget:      target,\n\t\tfirstIndex:  firstIndex(args),\n\t\tlastIndex:   lastIndex(args),\n\t\tcommitIndex: args.LeaderCommitIndex,\n\t}\n\tif len(t.ae) < cap(t.ae) {\n\t\tt.ae = append(t.ae, ae)\n\t}\n\treturn t.sendRPC(string(target), args, resp)\n}\n\nfunc (t *transport) DumpLog(dir string) {\n\tfw, _ := os.Create(filepath.Join(dir, t.node+\".transport\"))\n\tw := bufio.NewWriter(fw)\n\tfor i := range t.ae {\n\t\te := &t.ae[i]\n\t\tfmt.Fprintf(w, \"%v -> %v\\t%8d - %8d : %8d\\n\", e.source, e.target, e.firstIndex, e.lastIndex, e.commitIndex)\n\t}\n\tw.Flush()\n\tfw.Close()\n}\n\nfunc firstIndex(a *raft.AppendEntriesRequest) uint64 {\n\tif len(a.Entries) == 0 {\n\t\treturn 0\n\t}\n\treturn a.Entries[0].Index\n}\n\nfunc lastIndex(a *raft.AppendEntriesRequest) uint64 {\n\tif len(a.Entries) == 0 {\n\t\treturn 0\n\t}\n\treturn a.Entries[len(a.Entries)-1].Index\n}\n\n// RequestVote sends the appropriate RPC to the target node.\nfunc (t *transport) RequestVote(id raft.ServerID, target raft.ServerAddress, args *raft.RequestVoteRequest, resp *raft.RequestVoteResponse) error {\n\treturn t.sendRPC(string(target), args, resp)\n}\n\n// RequestPreVote sends the appropriate RPC to the target node.\nfunc (t *transport) RequestPreVote(id raft.ServerID, target raft.ServerAddress, args *raft.RequestPreVoteRequest, resp *raft.RequestPreVoteResponse) error {\n\treturn t.sendRPC(string(target), args, resp)\n}\n\n// InstallSnapshot is used to push a snapshot down to a follower. The data is read from\n// the ReadCloser and streamed to the client.\nfunc (t *transport) InstallSnapshot(id raft.ServerID, target raft.ServerAddress, args *raft.InstallSnapshotRequest, resp *raft.InstallSnapshotResponse, data io.Reader) error {\n\tt.log.Debug(\"INSTALL SNAPSHOT *************************************\")\n\treturn errors.New(\"huh\")\n}\n\n// EncodePeer is used to serialize a peer name.\nfunc (t *transport) EncodePeer(id raft.ServerID, p raft.ServerAddress) []byte {\n\treturn []byte(p)\n}\n\n// DecodePeer is used to deserialize a peer name.\nfunc (t *transport) DecodePeer(p []byte) raft.ServerAddress {\n\treturn raft.ServerAddress(p)\n}\n\n// SetHeartbeatHandler is used to setup a heartbeat handler\n// as a fast-pass. This is to avoid head-of-line blocking from\n// disk IO. If a Transport does not support this, it can simply\n// ignore the call, and push the heartbeat onto the Consumer channel.\nfunc (t *transport) SetHeartbeatHandler(cb func(rpc raft.RPC)) {\n}\n\n// AppendEntriesPipeline returns an interface that can be used to pipeline\n// AppendEntries requests.\nfunc (t *transport) AppendEntriesPipeline(id raft.ServerID, target raft.ServerAddress) (raft.AppendPipeline, error) {\n\tp := &pipeline{\n\t\tt:        t,\n\t\tid:       id,\n\t\ttarget:   target,\n\t\twork:     make(chan *appendEntry, 100),\n\t\tconsumer: make(chan raft.AppendFuture, 100),\n\t}\n\tgo p.run()\n\treturn p, nil\n}\n\ntype appendEntry struct {\n\treq      *raft.AppendEntriesRequest\n\tres      *raft.AppendEntriesResponse\n\tstart    time.Time\n\terr      error\n\tready    chan error\n\tconsumer chan raft.AppendFuture\n}\n\nfunc (e *appendEntry) Request() *raft.AppendEntriesRequest {\n\treturn e.req\n}\n\nfunc (e *appendEntry) Response() *raft.AppendEntriesResponse {\n\t<-e.ready\n\treturn e.res\n}\n\nfunc (e *appendEntry) Start() time.Time {\n\treturn e.start\n}\n\nfunc (e *appendEntry) Error() error {\n\t<-e.ready\n\treturn e.err\n}\n\nfunc (e *appendEntry) Respond(err error) {\n\te.err = err\n\tclose(e.ready)\n\te.consumer <- e\n}\n\ntype pipeline struct {\n\tt        *transport\n\ttarget   raft.ServerAddress\n\tid       raft.ServerID\n\twork     chan *appendEntry\n\tconsumer chan raft.AppendFuture\n}\n\nfunc (p *pipeline) run() {\n\tfor ap := range p.work {\n\t\terr := p.t.AppendEntries(p.id, p.target, ap.req, ap.res)\n\t\tap.Respond(err)\n\t}\n}\n\n// AppendEntries is used to add another request to the pipeline.\n// The send may block which is an effective form of back-pressure.\nfunc (p *pipeline) AppendEntries(args *raft.AppendEntriesRequest, resp *raft.AppendEntriesResponse) (raft.AppendFuture, error) {\n\te := &appendEntry{\n\t\treq:      args,\n\t\tres:      resp,\n\t\tstart:    time.Now(),\n\t\tready:    make(chan error),\n\t\tconsumer: p.consumer,\n\t}\n\tp.work <- e\n\treturn e, nil\n}\n\nfunc (p *pipeline) Consumer() <-chan raft.AppendFuture {\n\treturn p.consumer\n}\n\n// Closes pipeline and cancels all inflight RPCs\nfunc (p *pipeline) Close() error {\n\tclose(p.work)\n\treturn nil\n}\n"
  },
  {
    "path": "fuzzy/verifier.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage fuzzy\n\nimport (\n\t\"fmt\"\n\t\"sync\"\n\t\"testing\"\n\n\t\"github.com/hashicorp/raft\"\n)\n\n// AppendEntriesVerifier looks at all the AppendEntry RPC request and verifies that only one node sends AE requests for any given term\n// it also verifies that the request only comes from the node indicated as the leader in the AE message.\ntype appendEntriesVerifier struct {\n\tsync.RWMutex\n\tleaderForTerm map[uint64]string\n\terrors        []string\n}\n\nfunc (v *appendEntriesVerifier) Report(t *testing.T) {\n\tv.Lock()\n\tdefer v.Unlock()\n\tfor _, e := range v.errors {\n\t\tt.Error(e)\n\t}\n}\n\nfunc (v *appendEntriesVerifier) Init() {\n\tv.Lock()\n\tdefer v.Unlock()\n\tv.leaderForTerm = make(map[uint64]string)\n\tv.errors = make([]string, 0, 10)\n}\n\nfunc (v *appendEntriesVerifier) PreRPC(src, target string, r *raft.RPC) error {\n\treturn nil\n}\n\nfunc (v *appendEntriesVerifier) PostRPC(src, target string, req *raft.RPC, res *raft.RPCResponse) error {\n\treturn nil\n}\n\nfunc (v *appendEntriesVerifier) PreRequestVote(src, target string, rv *raft.RequestVoteRequest) (*raft.RequestVoteResponse, error) {\n\treturn nil, nil\n}\n\nfunc (v *appendEntriesVerifier) PreAppendEntries(src, target string, req *raft.AppendEntriesRequest) (*raft.AppendEntriesResponse, error) {\n\tterm := req.Term\n\tvar ldr string\n\tif len(req.RPCHeader.Addr) > 0 {\n\t\tldr = string(req.RPCHeader.Addr)\n\t} else {\n\t\tldr = string(req.Leader)\n\t}\n\n\tif ldr != src {\n\t\tv.Lock()\n\t\tdefer v.Unlock()\n\t\tv.errors = append(v.errors, fmt.Sprintf(\"Node %v sent an appendEntries request for term %d that said the leader was some other node %v\", src, term, ldr))\n\t}\n\tv.RLock()\n\ttl, exists := v.leaderForTerm[term]\n\tv.RUnlock()\n\tif exists && tl != ldr {\n\t\tv.Lock()\n\t\tdefer v.Unlock()\n\t\tv.errors = append(v.errors, fmt.Sprintf(\"Node %v sent an AppendEntries request for term %d, but node %v had already done some, multiple leaders for same term!\", src, term, tl))\n\t}\n\tif !exists {\n\t\tv.Lock()\n\t\ttl, exists := v.leaderForTerm[term]\n\t\tif exists && tl != ldr {\n\t\t\tv.errors = append(v.errors, fmt.Sprintf(\"Node %v sent an AppendEntries request for term %d, but node %v had already done some, multiple leaders for same term!\", src, term, tl))\n\t\t}\n\t\tif !exists {\n\t\t\tv.leaderForTerm[term] = ldr\n\t\t}\n\t\tv.Unlock()\n\t}\n\treturn nil, nil\n}\n"
  },
  {
    "path": "go.mod",
    "content": "module github.com/hashicorp/raft\n\ngo 1.24.0\n\nretract v1.1.3 // Deleted original tag; module checksum may not be accurate.\n\nrequire (\n\tgithub.com/hashicorp/go-hclog v1.6.3\n\tgithub.com/hashicorp/go-metrics v0.5.4\n\tgithub.com/hashicorp/go-msgpack/v2 v2.1.5\n\tgithub.com/stretchr/testify v1.11.1\n)\n\nrequire (\n\tgithub.com/armon/go-metrics v0.4.1 // indirect\n\tgithub.com/davecgh/go-spew v1.1.1 // indirect\n\tgithub.com/fatih/color v1.13.0 // indirect\n\tgithub.com/hashicorp/go-immutable-radix v1.0.0 // indirect\n\tgithub.com/hashicorp/golang-lru v0.5.0 // indirect\n\tgithub.com/kr/pretty v0.2.1 // indirect\n\tgithub.com/mattn/go-colorable v0.1.12 // indirect\n\tgithub.com/mattn/go-isatty v0.0.14 // indirect\n\tgithub.com/pmezard/go-difflib v1.0.0 // indirect\n\tgolang.org/x/sys v0.13.0 // indirect\n\tgopkg.in/yaml.v3 v3.0.1 // indirect\n)\n"
  },
  {
    "path": "go.sum",
    "content": "cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=\ngithub.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=\ngithub.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=\ngithub.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=\ngithub.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=\ngithub.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=\ngithub.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho=\ngithub.com/armon/go-metrics v0.4.1 h1:hR91U9KYmb6bLBYLQjyM+3j+rcd/UhE+G78SFnF8gJA=\ngithub.com/armon/go-metrics v0.4.1/go.mod h1:E6amYzXo6aW1tqzoZGT755KkbgrJsSdpwZ+3JqfkOG4=\ngithub.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=\ngithub.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=\ngithub.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=\ngithub.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=\ngithub.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag=\ngithub.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I=\ngithub.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=\ngithub.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=\ngithub.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=\ngithub.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w=\ngithub.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk=\ngithub.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=\ngithub.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=\ngithub.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY=\ngithub.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=\ngithub.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=\ngithub.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A=\ngithub.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=\ngithub.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=\ngithub.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=\ngithub.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=\ngithub.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=\ngithub.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=\ngithub.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=\ngithub.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=\ngithub.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=\ngithub.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=\ngithub.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=\ngithub.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=\ngithub.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=\ngithub.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=\ngithub.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=\ngithub.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=\ngithub.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=\ngithub.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=\ngithub.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=\ngithub.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k=\ngithub.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M=\ngithub.com/hashicorp/go-immutable-radix v1.0.0 h1:AKDB1HM5PWEA7i4nhcpwOrO2byshxBjXVn/J/3+z5/0=\ngithub.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=\ngithub.com/hashicorp/go-metrics v0.5.4 h1:8mmPiIJkTPPEbAiV97IxdAGNdRdaWwVap1BU6elejKY=\ngithub.com/hashicorp/go-metrics v0.5.4/go.mod h1:CG5yz4NZ/AI/aQt9Ucm/vdBnbh7fvmv4lxZ350i+QQI=\ngithub.com/hashicorp/go-msgpack/v2 v2.1.5 h1:Ue879bPnutj/hXfmUk6s/jtIK90XxgiUIcXRl656T44=\ngithub.com/hashicorp/go-msgpack/v2 v2.1.5/go.mod h1:bjCsRXpZ7NsJdk45PoCQnzRGDaK8TKm5ZnDI/9y3J4M=\ngithub.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs=\ngithub.com/hashicorp/go-uuid v1.0.0 h1:RS8zrF7PhGwyNPOtxSClXXj9HA8feRnJzgnI1RJCSnM=\ngithub.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=\ngithub.com/hashicorp/golang-lru v0.5.0 h1:CL2msUPvZTLb5O648aiLNJw3hnBxN2+1Jq8rCOH9wdo=\ngithub.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=\ngithub.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=\ngithub.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=\ngithub.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=\ngithub.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=\ngithub.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=\ngithub.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=\ngithub.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM=\ngithub.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=\ngithub.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=\ngithub.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=\ngithub.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=\ngithub.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI=\ngithub.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=\ngithub.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=\ngithub.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=\ngithub.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=\ngithub.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=\ngithub.com/mattn/go-colorable v0.1.12 h1:jF+Du6AlPIjs2BiUiQlKOX0rt3SujHxPnksPKZbaA40=\ngithub.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=\ngithub.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=\ngithub.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y=\ngithub.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=\ngithub.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=\ngithub.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=\ngithub.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=\ngithub.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=\ngithub.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=\ngithub.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=\ngithub.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=\ngithub.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY=\ngithub.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=\ngithub.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=\ngithub.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=\ngithub.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=\ngithub.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=\ngithub.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=\ngithub.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=\ngithub.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=\ngithub.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU=\ngithub.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M=\ngithub.com/prometheus/client_golang v1.11.1/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0=\ngithub.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=\ngithub.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=\ngithub.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=\ngithub.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=\ngithub.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4=\ngithub.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo=\ngithub.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc=\ngithub.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=\ngithub.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=\ngithub.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A=\ngithub.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=\ngithub.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA=\ngithub.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=\ngithub.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=\ngithub.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=\ngithub.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=\ngithub.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=\ngithub.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=\ngithub.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=\ngithub.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=\ngithub.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals=\ngithub.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=\ngithub.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=\ngithub.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM=\ngolang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=\ngolang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=\ngolang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=\ngolang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=\ngolang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=\ngolang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=\ngolang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=\ngolang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=\ngolang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=\ngolang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=\ngolang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=\ngolang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=\ngolang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=\ngolang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=\ngolang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=\ngolang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=\ngolang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=\ngolang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=\ngoogle.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=\ngoogle.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=\ngoogle.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=\ngoogle.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=\ngoogle.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=\ngoogle.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=\ngoogle.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=\ngoogle.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=\ngopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=\ngopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=\ngopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=\ngopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=\ngopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=\ngopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=\ngopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=\n"
  },
  {
    "path": "inmem_snapshot.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"bytes\"\n\t\"fmt\"\n\t\"io\"\n\t\"sync\"\n)\n\n// InmemSnapshotStore implements the SnapshotStore interface and\n// retains only the most recent snapshot\ntype InmemSnapshotStore struct {\n\tlatest      *InmemSnapshotSink\n\thasSnapshot bool\n\tsync.RWMutex\n}\n\n// InmemSnapshotSink implements SnapshotSink in memory\ntype InmemSnapshotSink struct {\n\tmeta     SnapshotMeta\n\tcontents *bytes.Buffer\n}\n\n// NewInmemSnapshotStore creates a blank new InmemSnapshotStore\nfunc NewInmemSnapshotStore() *InmemSnapshotStore {\n\treturn &InmemSnapshotStore{\n\t\tlatest: &InmemSnapshotSink{\n\t\t\tcontents: &bytes.Buffer{},\n\t\t},\n\t}\n}\n\n// Create replaces the stored snapshot with a new one using the given args\nfunc (m *InmemSnapshotStore) Create(version SnapshotVersion, index, term uint64,\n\tconfiguration Configuration, configurationIndex uint64, trans Transport) (SnapshotSink, error) {\n\t// We only support version 1 snapshots at this time.\n\tif version != 1 {\n\t\treturn nil, fmt.Errorf(\"unsupported snapshot version %d\", version)\n\t}\n\n\tname := snapshotName(term, index)\n\n\tm.Lock()\n\tdefer m.Unlock()\n\n\tsink := &InmemSnapshotSink{\n\t\tmeta: SnapshotMeta{\n\t\t\tVersion:            version,\n\t\t\tID:                 name,\n\t\t\tIndex:              index,\n\t\t\tTerm:               term,\n\t\t\tPeers:              encodePeers(configuration, trans),\n\t\t\tConfiguration:      configuration,\n\t\t\tConfigurationIndex: configurationIndex,\n\t\t},\n\t\tcontents: &bytes.Buffer{},\n\t}\n\tm.hasSnapshot = true\n\tm.latest = sink\n\n\treturn sink, nil\n}\n\n// List returns the latest snapshot taken\nfunc (m *InmemSnapshotStore) List() ([]*SnapshotMeta, error) {\n\tm.RLock()\n\tdefer m.RUnlock()\n\n\tif !m.hasSnapshot {\n\t\treturn []*SnapshotMeta{}, nil\n\t}\n\treturn []*SnapshotMeta{&m.latest.meta}, nil\n}\n\n// Open wraps an io.ReadCloser around the snapshot contents\nfunc (m *InmemSnapshotStore) Open(id string) (*SnapshotMeta, io.ReadCloser, error) {\n\tm.RLock()\n\tdefer m.RUnlock()\n\n\tif m.latest.meta.ID != id {\n\t\treturn nil, nil, fmt.Errorf(\"[ERR] snapshot: failed to open snapshot id: %s\", id)\n\t}\n\n\t// Make a copy of the contents, since a bytes.Buffer can only be read\n\t// once.\n\tcontents := bytes.NewBuffer(m.latest.contents.Bytes())\n\treturn &m.latest.meta, io.NopCloser(contents), nil\n}\n\n// Write appends the given bytes to the snapshot contents\nfunc (s *InmemSnapshotSink) Write(p []byte) (n int, err error) {\n\twritten, err := s.contents.Write(p)\n\ts.meta.Size += int64(written)\n\treturn written, err\n}\n\n// Close updates the Size and is otherwise a no-op\nfunc (s *InmemSnapshotSink) Close() error {\n\treturn nil\n}\n\n// ID returns the ID of the SnapshotMeta\nfunc (s *InmemSnapshotSink) ID() string {\n\treturn s.meta.ID\n}\n\n// Cancel returns successfully with a nil error\nfunc (s *InmemSnapshotSink) Cancel() error {\n\treturn nil\n}\n"
  },
  {
    "path": "inmem_snapshot_test.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"bytes\"\n\t\"io\"\n\t\"reflect\"\n\t\"testing\"\n)\n\nfunc TestInmemSnapshotStoreImpl(t *testing.T) {\n\tvar impl interface{} = &InmemSnapshotStore{}\n\tif _, ok := impl.(SnapshotStore); !ok {\n\t\tt.Fatalf(\"InmemSnapshotStore not a SnapshotStore\")\n\t}\n}\n\nfunc TestInmemSnapshotSinkImpl(t *testing.T) {\n\tvar impl interface{} = &InmemSnapshotSink{}\n\tif _, ok := impl.(SnapshotSink); !ok {\n\t\tt.Fatalf(\"InmemSnapshotSink not a SnapshotSink\")\n\t}\n}\n\nfunc TestInmemSS_CreateSnapshot(t *testing.T) {\n\tsnap := NewInmemSnapshotStore()\n\n\t// Check no snapshots\n\tsnaps, err := snap.List()\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tif len(snaps) != 0 {\n\t\tt.Fatalf(\"did not expect any snapshots: %v\", snaps)\n\t}\n\n\t// Create a new sink\n\tvar configuration Configuration\n\tconfiguration.Servers = append(configuration.Servers, Server{\n\t\tSuffrage: Voter,\n\t\tID:       ServerID(\"my id\"),\n\t\tAddress:  ServerAddress(\"over here\"),\n\t})\n\t_, trans := NewInmemTransport(NewInmemAddr())\n\tsink, err := snap.Create(SnapshotVersionMax, 10, 3, configuration, 2, trans)\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// The sink is not done, should not be in a list!\n\tsnaps, err = snap.List()\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tif len(snaps) != 1 {\n\t\tt.Fatalf(\"should always be 1 snapshot: %v\", snaps)\n\t}\n\n\t// Write to the sink\n\t_, err = sink.Write([]byte(\"first\\n\"))\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\t_, err = sink.Write([]byte(\"second\\n\"))\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Done!\n\terr = sink.Close()\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Should have a snapshot!\n\tsnaps, err = snap.List()\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tif len(snaps) != 1 {\n\t\tt.Fatalf(\"expect a snapshots: %v\", snaps)\n\t}\n\n\t// Check the latest\n\tlatest := snaps[0]\n\tif latest.Index != 10 {\n\t\tt.Fatalf(\"bad snapshot: %v\", *latest)\n\t}\n\tif latest.Term != 3 {\n\t\tt.Fatalf(\"bad snapshot: %v\", *latest)\n\t}\n\tif !reflect.DeepEqual(latest.Configuration, configuration) {\n\t\tt.Fatalf(\"bad snapshot: %v\", *latest)\n\t}\n\tif latest.ConfigurationIndex != 2 {\n\t\tt.Fatalf(\"bad snapshot: %v\", *latest)\n\t}\n\tif latest.Size != 13 {\n\t\tt.Fatalf(\"bad snapshot: %v\", *latest)\n\t}\n\n\t// Read the snapshot\n\t_, r, err := snap.Open(latest.ID)\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Read out everything\n\tvar buf bytes.Buffer\n\tif _, err := io.Copy(&buf, r); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tif err := r.Close(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Ensure a match\n\tif !bytes.Equal(buf.Bytes(), []byte(\"first\\nsecond\\n\")) {\n\t\tt.Fatalf(\"content mismatch\")\n\t}\n}\n\nfunc TestInmemSS_OpenSnapshotTwice(t *testing.T) {\n\tsnap := NewInmemSnapshotStore()\n\n\t// Create a new sink\n\tvar configuration Configuration\n\tconfiguration.Servers = append(configuration.Servers, Server{\n\t\tSuffrage: Voter,\n\t\tID:       ServerID(\"my id\"),\n\t\tAddress:  ServerAddress(\"over here\"),\n\t})\n\t_, trans := NewInmemTransport(NewInmemAddr())\n\tsink, err := snap.Create(SnapshotVersionMax, 10, 3, configuration, 2, trans)\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Write to the sink\n\t_, err = sink.Write([]byte(\"data\\n\"))\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\terr = sink.Close()\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Read the snapshot a first time\n\t_, r, err := snap.Open(sink.ID())\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Read out everything\n\tvar buf1 bytes.Buffer\n\tif _, err = io.Copy(&buf1, r); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tif err = r.Close(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Ensure a match\n\tif !bytes.Equal(buf1.Bytes(), []byte(\"data\\n\")) {\n\t\tt.Fatalf(\"content mismatch\")\n\t}\n\n\t// Read the snapshot a second time.\n\t_, r, err = snap.Open(sink.ID())\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Read out everything again\n\tvar buf2 bytes.Buffer\n\tif _, err := io.Copy(&buf2, r); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tif err := r.Close(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Ensure it's still the same content\n\tif !bytes.Equal(buf2.Bytes(), []byte(\"data\\n\")) {\n\t\tt.Fatalf(\"content mismatch\")\n\t}\n}\n"
  },
  {
    "path": "inmem_store.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"errors\"\n\t\"sync\"\n)\n\n// InmemStore implements the LogStore and StableStore interface.\n// It should NOT EVER be used for production. It is used only for\n// unit tests. Use the MDBStore implementation instead.\ntype InmemStore struct {\n\tl         sync.RWMutex\n\tlowIndex  uint64\n\thighIndex uint64\n\tlogs      map[uint64]*Log\n\tkv        map[string][]byte\n\tkvInt     map[string]uint64\n}\n\n// NewInmemStore returns a new in-memory backend. Do not ever\n// use for production. Only for testing.\nfunc NewInmemStore() *InmemStore {\n\ti := &InmemStore{\n\t\tlogs:  make(map[uint64]*Log),\n\t\tkv:    make(map[string][]byte),\n\t\tkvInt: make(map[string]uint64),\n\t}\n\treturn i\n}\n\n// FirstIndex implements the LogStore interface.\nfunc (i *InmemStore) FirstIndex() (uint64, error) {\n\ti.l.RLock()\n\tdefer i.l.RUnlock()\n\treturn i.lowIndex, nil\n}\n\n// LastIndex implements the LogStore interface.\nfunc (i *InmemStore) LastIndex() (uint64, error) {\n\ti.l.RLock()\n\tdefer i.l.RUnlock()\n\treturn i.highIndex, nil\n}\n\n// GetLog implements the LogStore interface.\nfunc (i *InmemStore) GetLog(index uint64, log *Log) error {\n\ti.l.RLock()\n\tdefer i.l.RUnlock()\n\tl, ok := i.logs[index]\n\tif !ok {\n\t\treturn ErrLogNotFound\n\t}\n\t*log = *l\n\treturn nil\n}\n\n// StoreLog implements the LogStore interface.\nfunc (i *InmemStore) StoreLog(log *Log) error {\n\treturn i.StoreLogs([]*Log{log})\n}\n\n// StoreLogs implements the LogStore interface.\nfunc (i *InmemStore) StoreLogs(logs []*Log) error {\n\ti.l.Lock()\n\tdefer i.l.Unlock()\n\tfor _, l := range logs {\n\t\ti.logs[l.Index] = l\n\t\tif i.lowIndex == 0 {\n\t\t\ti.lowIndex = l.Index\n\t\t}\n\t\tif l.Index > i.highIndex {\n\t\t\ti.highIndex = l.Index\n\t\t}\n\t}\n\treturn nil\n}\n\n// DeleteRange implements the LogStore interface.\nfunc (i *InmemStore) DeleteRange(min, max uint64) error {\n\ti.l.Lock()\n\tdefer i.l.Unlock()\n\tfor j := min; j <= max; j++ {\n\t\tdelete(i.logs, j)\n\t}\n\tif min <= i.lowIndex {\n\t\ti.lowIndex = max + 1\n\t}\n\tif max >= i.highIndex {\n\t\ti.highIndex = min - 1\n\t}\n\tif i.lowIndex > i.highIndex {\n\t\ti.lowIndex = 0\n\t\ti.highIndex = 0\n\t}\n\treturn nil\n}\n\n// Set implements the StableStore interface.\nfunc (i *InmemStore) Set(key []byte, val []byte) error {\n\ti.l.Lock()\n\tdefer i.l.Unlock()\n\ti.kv[string(key)] = val\n\treturn nil\n}\n\n// Get implements the StableStore interface.\nfunc (i *InmemStore) Get(key []byte) ([]byte, error) {\n\ti.l.RLock()\n\tdefer i.l.RUnlock()\n\tval := i.kv[string(key)]\n\tif val == nil {\n\t\treturn nil, errors.New(\"not found\")\n\t}\n\treturn val, nil\n}\n\n// SetUint64 implements the StableStore interface.\nfunc (i *InmemStore) SetUint64(key []byte, val uint64) error {\n\ti.l.Lock()\n\tdefer i.l.Unlock()\n\ti.kvInt[string(key)] = val\n\treturn nil\n}\n\n// GetUint64 implements the StableStore interface.\nfunc (i *InmemStore) GetUint64(key []byte) (uint64, error) {\n\ti.l.RLock()\n\tdefer i.l.RUnlock()\n\treturn i.kvInt[string(key)], nil\n}\n"
  },
  {
    "path": "inmem_transport.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"fmt\"\n\t\"io\"\n\t\"sync\"\n\t\"time\"\n)\n\n// NewInmemAddr returns a new in-memory addr with\n// a randomly generate UUID as the ID.\nfunc NewInmemAddr() ServerAddress {\n\treturn ServerAddress(generateUUID())\n}\n\n// inmemPipeline is used to pipeline requests for the in-mem transport.\ntype inmemPipeline struct {\n\ttrans    *InmemTransport\n\tpeer     *InmemTransport\n\tpeerAddr ServerAddress\n\n\tdoneCh       chan AppendFuture\n\tinprogressCh chan *inmemPipelineInflight\n\n\tshutdown     bool\n\tshutdownCh   chan struct{}\n\tshutdownLock sync.RWMutex\n}\n\ntype inmemPipelineInflight struct {\n\tfuture *appendFuture\n\trespCh <-chan RPCResponse\n}\n\n// InmemTransport Implements the Transport interface, to allow Raft to be\n// tested in-memory without going over a network.\ntype InmemTransport struct {\n\tsync.RWMutex\n\tconsumerCh chan RPC\n\tlocalAddr  ServerAddress\n\tpeers      map[ServerAddress]*InmemTransport\n\tpipelines  []*inmemPipeline\n\ttimeout    time.Duration\n}\n\n// NewInmemTransportWithTimeout is used to initialize a new transport and\n// generates a random local address if none is specified. The given timeout\n// will be used to decide how long to wait for a connected peer to process the\n// RPCs that we're sending it. See also Connect() and Consumer().\nfunc NewInmemTransportWithTimeout(addr ServerAddress, timeout time.Duration) (ServerAddress, *InmemTransport) {\n\tif string(addr) == \"\" {\n\t\taddr = NewInmemAddr()\n\t}\n\ttrans := &InmemTransport{\n\t\tconsumerCh: make(chan RPC, 16),\n\t\tlocalAddr:  addr,\n\t\tpeers:      make(map[ServerAddress]*InmemTransport),\n\t\ttimeout:    timeout,\n\t}\n\treturn addr, trans\n}\n\n// NewInmemTransport is used to initialize a new transport\n// and generates a random local address if none is specified\nfunc NewInmemTransport(addr ServerAddress) (ServerAddress, *InmemTransport) {\n\treturn NewInmemTransportWithTimeout(addr, 500*time.Millisecond)\n}\n\n// SetHeartbeatHandler is used to set optional fast-path for\n// heartbeats, not supported for this transport.\nfunc (i *InmemTransport) SetHeartbeatHandler(cb func(RPC)) {\n}\n\n// Consumer implements the Transport interface.\nfunc (i *InmemTransport) Consumer() <-chan RPC {\n\treturn i.consumerCh\n}\n\n// LocalAddr implements the Transport interface.\nfunc (i *InmemTransport) LocalAddr() ServerAddress {\n\treturn i.localAddr\n}\n\n// AppendEntriesPipeline returns an interface that can be used to pipeline\n// AppendEntries requests.\nfunc (i *InmemTransport) AppendEntriesPipeline(id ServerID, target ServerAddress) (AppendPipeline, error) {\n\ti.Lock()\n\tdefer i.Unlock()\n\n\tpeer, ok := i.peers[target]\n\tif !ok {\n\t\treturn nil, fmt.Errorf(\"failed to connect to peer: %v\", target)\n\t}\n\tpipeline := newInmemPipeline(i, peer, target)\n\ti.pipelines = append(i.pipelines, pipeline)\n\treturn pipeline, nil\n}\n\n// AppendEntries implements the Transport interface.\nfunc (i *InmemTransport) AppendEntries(id ServerID, target ServerAddress, args *AppendEntriesRequest, resp *AppendEntriesResponse) error {\n\trpcResp, err := i.makeRPC(target, args, nil, i.timeout)\n\tif err != nil {\n\t\treturn err\n\t}\n\n\t// Copy the result back\n\tout := rpcResp.Response.(*AppendEntriesResponse)\n\t*resp = *out\n\treturn nil\n}\n\n// RequestVote implements the Transport interface.\nfunc (i *InmemTransport) RequestVote(id ServerID, target ServerAddress, args *RequestVoteRequest, resp *RequestVoteResponse) error {\n\trpcResp, err := i.makeRPC(target, args, nil, i.timeout)\n\tif err != nil {\n\t\treturn err\n\t}\n\n\t// Copy the result back\n\tout := rpcResp.Response.(*RequestVoteResponse)\n\t*resp = *out\n\treturn nil\n}\n\nfunc (i *InmemTransport) RequestPreVote(id ServerID, target ServerAddress, args *RequestPreVoteRequest, resp *RequestPreVoteResponse) error {\n\trpcResp, err := i.makeRPC(target, args, nil, i.timeout)\n\tif err != nil {\n\t\treturn err\n\t}\n\n\t// Copy the result back\n\tout := rpcResp.Response.(*RequestPreVoteResponse)\n\t*resp = *out\n\treturn nil\n}\n\n// InstallSnapshot implements the Transport interface.\nfunc (i *InmemTransport) InstallSnapshot(id ServerID, target ServerAddress, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error {\n\trpcResp, err := i.makeRPC(target, args, data, 10*i.timeout)\n\tif err != nil {\n\t\treturn err\n\t}\n\n\t// Copy the result back\n\tout := rpcResp.Response.(*InstallSnapshotResponse)\n\t*resp = *out\n\treturn nil\n}\n\n// TimeoutNow implements the Transport interface.\nfunc (i *InmemTransport) TimeoutNow(id ServerID, target ServerAddress, args *TimeoutNowRequest, resp *TimeoutNowResponse) error {\n\trpcResp, err := i.makeRPC(target, args, nil, 10*i.timeout)\n\tif err != nil {\n\t\treturn err\n\t}\n\n\t// Copy the result back\n\tout := rpcResp.Response.(*TimeoutNowResponse)\n\t*resp = *out\n\treturn nil\n}\n\nfunc (i *InmemTransport) makeRPC(target ServerAddress, args interface{}, r io.Reader, timeout time.Duration) (rpcResp RPCResponse, err error) {\n\ti.RLock()\n\tpeer, ok := i.peers[target]\n\ti.RUnlock()\n\n\tif !ok {\n\t\terr = fmt.Errorf(\"failed to connect to peer: %v\", target)\n\t\treturn\n\t}\n\n\t// Send the RPC over\n\trespCh := make(chan RPCResponse, 1)\n\treq := RPC{\n\t\tCommand:  args,\n\t\tReader:   r,\n\t\tRespChan: respCh,\n\t}\n\tselect {\n\tcase peer.consumerCh <- req:\n\tcase <-time.After(timeout):\n\t\terr = fmt.Errorf(\"send timed out\")\n\t\treturn\n\t}\n\n\t// Wait for a response\n\tselect {\n\tcase rpcResp = <-respCh:\n\t\tif rpcResp.Error != nil {\n\t\t\terr = rpcResp.Error\n\t\t}\n\tcase <-time.After(timeout):\n\t\terr = fmt.Errorf(\"command timed out\")\n\t}\n\treturn\n}\n\n// EncodePeer implements the Transport interface.\nfunc (i *InmemTransport) EncodePeer(id ServerID, p ServerAddress) []byte {\n\treturn []byte(p)\n}\n\n// DecodePeer implements the Transport interface.\nfunc (i *InmemTransport) DecodePeer(buf []byte) ServerAddress {\n\treturn ServerAddress(buf)\n}\n\n// Connect is used to connect this transport to another transport for\n// a given peer name. This allows for local routing.\nfunc (i *InmemTransport) Connect(peer ServerAddress, t Transport) {\n\ttrans := t.(*InmemTransport)\n\ti.Lock()\n\tdefer i.Unlock()\n\ti.peers[peer] = trans\n}\n\n// Disconnect is used to remove the ability to route to a given peer.\nfunc (i *InmemTransport) Disconnect(peer ServerAddress) {\n\ti.Lock()\n\tdefer i.Unlock()\n\tdelete(i.peers, peer)\n\n\t// Disconnect any pipelines\n\tn := len(i.pipelines)\n\tfor idx := 0; idx < n; idx++ {\n\t\tif i.pipelines[idx].peerAddr == peer {\n\t\t\t_ = i.pipelines[idx].Close()\n\t\t\ti.pipelines[idx], i.pipelines[n-1] = i.pipelines[n-1], nil\n\t\t\tidx--\n\t\t\tn--\n\t\t}\n\t}\n\ti.pipelines = i.pipelines[:n]\n}\n\n// DisconnectAll is used to remove all routes to peers.\nfunc (i *InmemTransport) DisconnectAll() {\n\ti.Lock()\n\tdefer i.Unlock()\n\ti.peers = make(map[ServerAddress]*InmemTransport)\n\n\t// Handle pipelines\n\tfor _, pipeline := range i.pipelines {\n\t\t_ = pipeline.Close()\n\t}\n\ti.pipelines = nil\n}\n\n// Close is used to permanently disable the transport\nfunc (i *InmemTransport) Close() error {\n\ti.DisconnectAll()\n\treturn nil\n}\n\nfunc newInmemPipeline(trans *InmemTransport, peer *InmemTransport, addr ServerAddress) *inmemPipeline {\n\ti := &inmemPipeline{\n\t\ttrans:        trans,\n\t\tpeer:         peer,\n\t\tpeerAddr:     addr,\n\t\tdoneCh:       make(chan AppendFuture, 16),\n\t\tinprogressCh: make(chan *inmemPipelineInflight, 16),\n\t\tshutdownCh:   make(chan struct{}),\n\t}\n\tgo i.decodeResponses()\n\treturn i\n}\n\nfunc (i *inmemPipeline) decodeResponses() {\n\ttimeout := i.trans.timeout\n\tfor {\n\t\tselect {\n\t\tcase inp := <-i.inprogressCh:\n\t\t\tvar timeoutCh <-chan time.Time\n\t\t\tif timeout > 0 {\n\t\t\t\ttimeoutCh = time.After(timeout)\n\t\t\t}\n\n\t\t\tselect {\n\t\t\tcase rpcResp := <-inp.respCh:\n\t\t\t\t// Copy the result back\n\t\t\t\t*inp.future.resp = *rpcResp.Response.(*AppendEntriesResponse)\n\t\t\t\tinp.future.respond(rpcResp.Error)\n\n\t\t\t\tselect {\n\t\t\t\tcase i.doneCh <- inp.future:\n\t\t\t\tcase <-i.shutdownCh:\n\t\t\t\t\treturn\n\t\t\t\t}\n\n\t\t\tcase <-timeoutCh:\n\t\t\t\tinp.future.respond(fmt.Errorf(\"command timed out\"))\n\t\t\t\tselect {\n\t\t\t\tcase i.doneCh <- inp.future:\n\t\t\t\tcase <-i.shutdownCh:\n\t\t\t\t\treturn\n\t\t\t\t}\n\n\t\t\tcase <-i.shutdownCh:\n\t\t\t\treturn\n\t\t\t}\n\t\tcase <-i.shutdownCh:\n\t\t\treturn\n\t\t}\n\t}\n}\n\nfunc (i *inmemPipeline) AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error) {\n\t// Create a new future\n\tfuture := &appendFuture{\n\t\tstart: time.Now(),\n\t\targs:  args,\n\t\tresp:  resp,\n\t}\n\tfuture.init()\n\n\t// Handle a timeout\n\tvar timeout <-chan time.Time\n\tif i.trans.timeout > 0 {\n\t\ttimeout = time.After(i.trans.timeout)\n\t}\n\n\t// Send the RPC over\n\trespCh := make(chan RPCResponse, 1)\n\trpc := RPC{\n\t\tCommand:  args,\n\t\tRespChan: respCh,\n\t}\n\n\t// Check if we have been already shutdown, otherwise the random choose\n\t// made by select statement below might pick consumerCh even if\n\t// shutdownCh was closed.\n\ti.shutdownLock.RLock()\n\tshutdown := i.shutdown\n\ti.shutdownLock.RUnlock()\n\tif shutdown {\n\t\treturn nil, ErrPipelineShutdown\n\t}\n\n\tselect {\n\tcase i.peer.consumerCh <- rpc:\n\tcase <-timeout:\n\t\treturn nil, fmt.Errorf(\"command enqueue timeout\")\n\tcase <-i.shutdownCh:\n\t\treturn nil, ErrPipelineShutdown\n\t}\n\n\t// Send to be decoded\n\tselect {\n\tcase i.inprogressCh <- &inmemPipelineInflight{future, respCh}:\n\t\treturn future, nil\n\tcase <-i.shutdownCh:\n\t\treturn nil, ErrPipelineShutdown\n\t}\n}\n\nfunc (i *inmemPipeline) Consumer() <-chan AppendFuture {\n\treturn i.doneCh\n}\n\nfunc (i *inmemPipeline) Close() error {\n\ti.shutdownLock.Lock()\n\tdefer i.shutdownLock.Unlock()\n\tif i.shutdown {\n\t\treturn nil\n\t}\n\n\ti.shutdown = true\n\tclose(i.shutdownCh)\n\treturn nil\n}\n"
  },
  {
    "path": "inmem_transport_test.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"testing\"\n\t\"time\"\n\n\t\"github.com/stretchr/testify/require\"\n)\n\nfunc TestInmemTransportImpl(t *testing.T) {\n\tvar inm interface{} = &InmemTransport{}\n\tif _, ok := inm.(Transport); !ok {\n\t\tt.Fatalf(\"InmemTransport is not a Transport\")\n\t}\n\tif _, ok := inm.(LoopbackTransport); !ok {\n\t\tt.Fatalf(\"InmemTransport is not a Loopback Transport\")\n\t}\n\tif _, ok := inm.(WithPeers); !ok {\n\t\tt.Fatalf(\"InmemTransport is not a WithPeers Transport\")\n\t}\n}\n\nfunc TestInmemTransportWriteTimeout(t *testing.T) {\n\t// InmemTransport should timeout if the other end has gone away\n\t// when it tries to send a request.\n\t// Use unbuffered channels so that we can see the write failing\n\t// without having to contrive to fill up the buffer first.\n\ttimeout := 10 * time.Millisecond\n\tt1 := &InmemTransport{\n\t\tconsumerCh: make(chan RPC),\n\t\tlocalAddr:  NewInmemAddr(),\n\t\tpeers:      make(map[ServerAddress]*InmemTransport),\n\t\ttimeout:    timeout,\n\t}\n\tt2 := &InmemTransport{\n\t\tconsumerCh: make(chan RPC),\n\t\tlocalAddr:  NewInmemAddr(),\n\t\tpeers:      make(map[ServerAddress]*InmemTransport),\n\t\ttimeout:    timeout,\n\t}\n\ta2 := t2.LocalAddr()\n\tt1.Connect(a2, t2)\n\n\tstop := make(chan struct{})\n\tstopped := make(chan struct{})\n\tgo func() {\n\t\tdefer close(stopped)\n\t\tvar i uint64\n\t\tfor {\n\t\t\tselect {\n\t\t\tcase <-stop:\n\t\t\t\treturn\n\t\t\tcase rpc := <-t2.Consumer():\n\t\t\t\ti++\n\t\t\t\trpc.Respond(&AppendEntriesResponse{\n\t\t\t\t\tSuccess: true,\n\t\t\t\t\tLastLog: i,\n\t\t\t\t}, nil)\n\t\t\t}\n\t\t}\n\t}()\n\n\tvar resp AppendEntriesResponse\n\t// Sanity check that sending is working before stopping the\n\t// responder.\n\terr := t1.AppendEntries(\"server1\", a2, &AppendEntriesRequest{}, &resp)\n\tNoErr(err, t)\n\trequire.True(t, resp.LastLog == 1)\n\n\tclose(stop)\n\tselect {\n\tcase <-stopped:\n\tcase <-time.After(time.Second):\n\t\tt.Fatalf(\"timed out waiting for responder to stop\")\n\t}\n\n\terr = t1.AppendEntries(\"server1\", a2, &AppendEntriesRequest{}, &resp)\n\tif err == nil {\n\t\tt.Fatalf(\"expected AppendEntries to time out\")\n\t}\n\tif err.Error() != \"send timed out\" {\n\t\tt.Fatalf(\"unexpected error: %v\", err)\n\t}\n}\n"
  },
  {
    "path": "integ_test.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"bytes\"\n\t\"context\"\n\t\"fmt\"\n\t\"os\"\n\t\"sync/atomic\"\n\t\"testing\"\n\t\"time\"\n\n\t\"github.com/hashicorp/go-hclog\"\n\t\"github.com/stretchr/testify/require\"\n)\n\n// CheckInteg will skip a test if integration testing is not enabled.\nfunc CheckInteg(t *testing.T) {\n\tif !IsInteg() {\n\t\tt.SkipNow()\n\t}\n}\n\n// IsInteg returns a boolean telling you if we're in integ testing mode.\nfunc IsInteg() bool {\n\treturn os.Getenv(\"INTEG_TESTS\") != \"\"\n}\n\ntype RaftEnv struct {\n\tdir      string\n\tconf     *Config\n\tfsm      *MockFSM\n\tstore    *InmemStore\n\tsnapshot *FileSnapshotStore\n\ttrans    *NetworkTransport\n\traft     *Raft\n\tlogger   hclog.Logger\n}\n\n// Release shuts down and cleans up any stored data, its not restartable after this\nfunc (r *RaftEnv) Release() {\n\tr.Shutdown()\n\t_ = os.RemoveAll(r.dir)\n}\n\n// Shutdown shuts down raft & transport, but keeps track of its data, its restartable\n// after a Shutdown() by calling Start()\nfunc (r *RaftEnv) Shutdown() {\n\tr.logger.Warn(fmt.Sprintf(\"Shutdown node at %v\", r.raft.localAddr))\n\tf := r.raft.Shutdown()\n\tif err := f.Error(); err != nil {\n\t\tpanic(err)\n\t}\n\t_ = r.trans.Close()\n}\n\n// Restart will start a raft node that was previously Shutdown()\nfunc (r *RaftEnv) Restart(t *testing.T) {\n\ttrans, err := NewTCPTransport(string(r.raft.localAddr), nil, 2, time.Second, nil)\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tr.trans = trans\n\tr.logger.Info(\"starting node\", \"addr\", trans.LocalAddr())\n\traft, err := NewRaft(r.conf, r.fsm, r.store, r.store, r.snapshot, r.trans)\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tr.raft = raft\n}\n\nfunc MakeRaft(tb testing.TB, conf *Config, bootstrap bool) *RaftEnv {\n\t// Set the config\n\tif conf == nil {\n\t\tconf = inmemConfig(tb)\n\t}\n\n\tdir, err := os.MkdirTemp(\"\", \"raft\")\n\tif err != nil {\n\t\ttb.Fatalf(\"err: %v \", err)\n\t}\n\n\tstable := NewInmemStore()\n\n\tsnap, err := NewFileSnapshotStore(dir, 3, nil)\n\tif err != nil {\n\t\ttb.Fatalf(\"err: %v\", err)\n\t}\n\n\tenv := &RaftEnv{\n\t\tconf:     conf,\n\t\tdir:      dir,\n\t\tstore:    stable,\n\t\tsnapshot: snap,\n\t\tfsm:      &MockFSM{},\n\t}\n\ttrans, err := NewTCPTransport(\"localhost:0\", nil, 2, time.Second, nil)\n\tif err != nil {\n\t\ttb.Fatalf(\"err: %v\", err)\n\t}\n\n\tenv.logger = hclog.New(&hclog.LoggerOptions{\n\t\tName: string(trans.LocalAddr()) + \" :\",\n\t})\n\tenv.trans = trans\n\n\tif bootstrap {\n\t\tvar configuration Configuration\n\t\tconfiguration.Servers = append(configuration.Servers, Server{\n\t\t\tSuffrage: Voter,\n\t\t\tID:       conf.LocalID,\n\t\t\tAddress:  trans.LocalAddr(),\n\t\t})\n\t\terr = BootstrapCluster(conf, stable, stable, snap, trans, configuration)\n\t\tif err != nil {\n\t\t\ttb.Fatalf(\"err: %v\", err)\n\t\t}\n\t}\n\tenv.logger.Info(\"starting node\", \"addr\", trans.LocalAddr())\n\tconf.Logger = env.logger\n\traft, err := NewRaft(conf, env.fsm, stable, stable, snap, trans)\n\tif err != nil {\n\t\ttb.Fatalf(\"err: %v\", err)\n\t}\n\tenv.raft = raft\n\treturn env\n}\n\nfunc WaitFor(env *RaftEnv, state RaftState) error {\n\tlimit := time.Now().Add(200 * time.Millisecond)\n\tfor env.raft.State() != state {\n\t\tif time.Now().Before(limit) {\n\t\t\ttime.Sleep(10 * time.Millisecond)\n\t\t} else {\n\t\t\treturn fmt.Errorf(\"failed to transition to state %v\", state)\n\t\t}\n\t}\n\treturn nil\n}\n\nfunc WaitForAny(state RaftState, envs []*RaftEnv) (*RaftEnv, error) {\n\tlimit := time.Now().Add(200 * time.Millisecond)\nCHECK:\n\tfor _, env := range envs {\n\t\tif env.raft.State() == state {\n\t\t\treturn env, nil\n\t\t}\n\t}\n\tif time.Now().Before(limit) {\n\t\tgoto WAIT\n\t}\n\treturn nil, fmt.Errorf(\"failed to find node in %v state\", state)\nWAIT:\n\ttime.Sleep(10 * time.Millisecond)\n\tgoto CHECK\n}\n\nfunc WaitFuture(f Future) error {\n\ttimer := time.AfterFunc(1000*time.Millisecond, func() {\n\t\tpanic(fmt.Errorf(\"timeout waiting for future %v\", f))\n\t})\n\tdefer timer.Stop()\n\treturn f.Error()\n}\n\nfunc NoErr(err error, tb testing.TB) {\n\ttb.Helper()\n\tif err != nil {\n\t\ttb.Fatalf(\"err: %v\", err)\n\t}\n}\n\nfunc CheckConsistent(envs []*RaftEnv, t *testing.T) {\n\tlimit := time.Now().Add(400 * time.Millisecond)\n\tfirst := envs[0]\n\tfirst.fsm.Lock()\n\tdefer first.fsm.Unlock()\n\tvar err error\nCHECK:\n\tl1 := len(first.fsm.logs)\n\tfor i := 1; i < len(envs); i++ {\n\t\tenv := envs[i]\n\t\tenv.fsm.Lock()\n\t\tl2 := len(env.fsm.logs)\n\t\tif l1 != l2 {\n\t\t\terr = fmt.Errorf(\"log length mismatch %d %d\", l1, l2)\n\t\t\tenv.fsm.Unlock()\n\t\t\tgoto ERR\n\t\t}\n\t\tfor idx, log := range first.fsm.logs {\n\t\t\tother := env.fsm.logs[idx]\n\t\t\tif !bytes.Equal(log, other) {\n\t\t\t\terr = fmt.Errorf(\"log entry %d mismatch between %s/%s : '%s' / '%s'\", idx, first.raft.localAddr, env.raft.localAddr, log, other)\n\t\t\t\tenv.fsm.Unlock()\n\t\t\t\tgoto ERR\n\t\t\t}\n\t\t}\n\t\tenv.fsm.Unlock()\n\t}\n\treturn\nERR:\n\tif time.Now().After(limit) {\n\t\tt.Fatalf(\"%v\", err)\n\t}\n\tfirst.fsm.Unlock()\n\ttime.Sleep(20 * time.Millisecond)\n\tfirst.fsm.Lock()\n\tgoto CHECK\n}\n\n// return a log entry that's at least sz long that has the prefix 'test i '\nfunc logBytes(i, sz int) []byte {\n\tvar logBuffer bytes.Buffer\n\tfmt.Fprintf(&logBuffer, \"test %d \", i)\n\tfor logBuffer.Len() < sz {\n\t\tlogBuffer.WriteByte('x')\n\t}\n\treturn logBuffer.Bytes()\n}\n\n// Tests Raft by creating a cluster, growing it to 5 nodes while\n// causing various stressful conditions\nfunc TestRaft_Integ(t *testing.T) {\n\tCheckInteg(t)\n\tconf := DefaultConfig()\n\tconf.LocalID = ServerID(\"first\")\n\tconf.HeartbeatTimeout = 50 * time.Millisecond\n\tconf.ElectionTimeout = 50 * time.Millisecond\n\tconf.LeaderLeaseTimeout = 50 * time.Millisecond\n\tconf.CommitTimeout = 5 * time.Millisecond\n\tconf.SnapshotThreshold = 100\n\tconf.TrailingLogs = 10\n\n\t// Create a single node\n\tenv1 := MakeRaft(t, conf, true)\n\tNoErr(WaitFor(env1, Leader), t)\n\n\ttotalApplied := 0\n\tapplyAndWait := func(leader *RaftEnv, n, sz int) {\n\t\t// Do some commits\n\t\tvar futures []ApplyFuture\n\t\tfor i := 0; i < n; i++ {\n\t\t\tfutures = append(futures, leader.raft.Apply(logBytes(i, sz), 0))\n\t\t}\n\t\tfor _, f := range futures {\n\t\t\tNoErr(WaitFuture(f), t)\n\t\t\tleader.logger.Debug(\"applied\", \"index\", f.Index(), \"size\", sz)\n\t\t}\n\t\ttotalApplied += n\n\t}\n\t// Do some commits\n\tapplyAndWait(env1, 100, 10)\n\n\t// Do a snapshot\n\tNoErr(WaitFuture(env1.raft.Snapshot()), t)\n\n\t// Join a few nodes!\n\tvar envs []*RaftEnv\n\tfor i := 0; i < 4; i++ {\n\t\tconf.LocalID = ServerID(fmt.Sprintf(\"next-batch-%d\", i))\n\t\tenv := MakeRaft(t, conf, false)\n\t\taddr := env.trans.LocalAddr()\n\t\tNoErr(WaitFuture(env1.raft.AddVoter(conf.LocalID, addr, 0, 0)), t)\n\t\tenvs = append(envs, env)\n\t}\n\n\t// Wait for a leader\n\tleader, err := WaitForAny(Leader, append([]*RaftEnv{env1}, envs...))\n\tNoErr(err, t)\n\n\t// Do some more commits\n\tapplyAndWait(leader, 100, 10)\n\n\t// Snapshot the leader\n\tNoErr(WaitFuture(leader.raft.Snapshot()), t)\n\n\tCheckConsistent(append([]*RaftEnv{env1}, envs...), t)\n\n\t// shutdown a follower\n\tdisconnected := envs[len(envs)-1]\n\tdisconnected.Shutdown()\n\n\t// Do some more commits [make sure the resulting snapshot will be a reasonable size]\n\tapplyAndWait(leader, 100, 10000)\n\n\t// snapshot the leader [leaders log should be compacted past the disconnected follower log now]\n\tNoErr(WaitFuture(leader.raft.Snapshot()), t)\n\n\t// Unfortunately we need to wait for the leader to start backing off RPCs to the down follower\n\t// such that when the follower comes back up it'll run an election before it gets an rpc from\n\t// the leader\n\ttime.Sleep(time.Second * 5)\n\n\t// start the now out of date follower back up\n\tdisconnected.Restart(t)\n\n\t// wait for it to get caught up\n\ttimeout := time.Now().Add(time.Second * 10)\n\tfor disconnected.raft.getLastApplied() < leader.raft.getLastApplied() {\n\t\ttime.Sleep(time.Millisecond)\n\t\tif time.Now().After(timeout) {\n\t\t\tt.Fatalf(\"Gave up waiting for follower to get caught up to leader\")\n\t\t}\n\t}\n\n\tCheckConsistent(append([]*RaftEnv{env1}, envs...), t)\n\n\t// Shoot two nodes in the head!\n\trm1, rm2 := envs[0], envs[1]\n\trm1.Release()\n\trm2.Release()\n\tenvs = envs[2:]\n\ttime.Sleep(10 * time.Millisecond)\n\n\t// Wait for a leader\n\tleader, err = WaitForAny(Leader, append([]*RaftEnv{env1}, envs...))\n\tNoErr(err, t)\n\n\t// Do some more commits\n\tapplyAndWait(leader, 100, 10)\n\n\t// Join a few new nodes!\n\tfor i := 0; i < 2; i++ {\n\t\tconf.LocalID = ServerID(fmt.Sprintf(\"final-batch-%d\", i))\n\t\tenv := MakeRaft(t, conf, false)\n\t\taddr := env.trans.LocalAddr()\n\t\tNoErr(WaitFuture(leader.raft.AddVoter(conf.LocalID, addr, 0, 0)), t)\n\t\tenvs = append(envs, env)\n\n\t\tleader, err = WaitForAny(Leader, append([]*RaftEnv{env1}, envs...))\n\t\tNoErr(err, t)\n\t}\n\n\t// Wait for a leader\n\tleader, err = WaitForAny(Leader, append([]*RaftEnv{env1}, envs...))\n\tNoErr(err, t)\n\n\t// Remove the old nodes\n\tNoErr(WaitFuture(leader.raft.RemoveServer(rm1.raft.localID, 0, 0)), t)\n\tNoErr(WaitFuture(leader.raft.RemoveServer(rm2.raft.localID, 0, 0)), t)\n\n\t// Shoot the leader\n\tenv1.Release()\n\ttime.Sleep(3 * conf.HeartbeatTimeout)\n\n\t// Wait for a leader\n\t_, err = WaitForAny(Leader, envs)\n\tNoErr(err, t)\n\n\tallEnvs := append([]*RaftEnv{env1}, envs...)\n\tCheckConsistent(allEnvs, t)\n\n\tif len(env1.fsm.logs) != totalApplied {\n\t\tt.Fatalf(\"should apply %d logs! %d\", totalApplied, len(env1.fsm.logs))\n\t}\n\n\tfor _, e := range envs {\n\t\te.Release()\n\t}\n}\n\nfunc TestRaft_RestartFollower_LongInitialHeartbeat(t *testing.T) {\n\tCheckInteg(t)\n\ttests := []struct {\n\t\tname                   string\n\t\trestartInitialTimeouts time.Duration\n\t\texpectNewLeader        bool\n\t}{\n\t\t{\"Default\", 0, true},\n\t\t{\"InitialHigher\", time.Second, false},\n\t}\n\tfor _, tt := range tests {\n\t\tt.Run(tt.name, func(t *testing.T) {\n\t\t\tconf := DefaultConfig()\n\t\t\tconf.LocalID = ServerID(\"first\")\n\t\t\tconf.HeartbeatTimeout = 50 * time.Millisecond\n\t\t\tconf.ElectionTimeout = 50 * time.Millisecond\n\t\t\tconf.LeaderLeaseTimeout = 50 * time.Millisecond\n\t\t\tconf.CommitTimeout = 5 * time.Millisecond\n\t\t\tconf.SnapshotThreshold = 100\n\t\t\tconf.TrailingLogs = 10\n\n\t\t\t// Create a single node\n\t\t\tenv1 := MakeRaft(t, conf, true)\n\t\t\tNoErr(WaitFor(env1, Leader), t)\n\n\t\t\t// Join a few nodes!\n\t\t\tvar envs []*RaftEnv\n\t\t\tfor i := 0; i < 2; i++ {\n\t\t\t\tconf.LocalID = ServerID(fmt.Sprintf(\"next-batch-%d\", i))\n\t\t\t\tenv := MakeRaft(t, conf, false)\n\t\t\t\taddr := env.trans.LocalAddr()\n\t\t\t\tNoErr(WaitFuture(env1.raft.AddVoter(conf.LocalID, addr, 0, 0)), t)\n\t\t\t\tenvs = append(envs, env)\n\t\t\t}\n\t\t\tallEnvs := append([]*RaftEnv{env1}, envs...)\n\n\t\t\t// Wait for a leader\n\t\t\t_, err := WaitForAny(Leader, append([]*RaftEnv{env1}, envs...))\n\t\t\tNoErr(err, t)\n\n\t\t\tCheckConsistent(append([]*RaftEnv{env1}, envs...), t)\n\t\t\t// TODO without this sleep, the restarted follower doesn't have any stored config\n\t\t\t// and aborts the election because it doesn't know of any peers.  Shouldn't\n\t\t\t// CheckConsistent prevent that?\n\t\t\ttime.Sleep(time.Second)\n\n\t\t\t// shutdown a follower\n\t\t\tdisconnected := envs[len(envs)-1]\n\t\t\tdisconnected.logger.Info(\"stopping follower\")\n\t\t\tdisconnected.Shutdown()\n\n\t\t\tseeNewLeader := func(o *Observation) bool { _, ok := o.Data.(LeaderObservation); return ok }\n\t\t\tleaderCh := make(chan Observation)\n\t\t\t// TODO Closing this channel results in panics, even though we're calling Release.\n\t\t\t// defer close(leaderCh)\n\t\t\tleaderChanges := new(uint32)\n\t\t\tgo func() {\n\t\t\t\tfor range leaderCh {\n\t\t\t\t\tatomic.AddUint32(leaderChanges, 1)\n\t\t\t\t}\n\t\t\t}()\n\n\t\t\trequestVoteCh := make(chan Observation)\n\t\t\tseeRequestVote := func(o *Observation) bool { _, ok := o.Data.(RequestVoteRequest); return ok }\n\t\t\trequestVotes := new(uint32)\n\t\t\tgo func() {\n\t\t\t\tfor range requestVoteCh {\n\t\t\t\t\tatomic.AddUint32(requestVotes, 1)\n\t\t\t\t}\n\t\t\t}()\n\n\t\t\tfor _, env := range allEnvs {\n\t\t\t\tenv.raft.RegisterObserver(NewObserver(leaderCh, false, seeNewLeader))\n\t\t\t}\n\n\t\t\t// Unfortunately we need to wait for the leader to start backing off RPCs to the down follower\n\t\t\t// such that when the follower comes back up it'll run an election before it gets an rpc from\n\t\t\t// the leader\n\t\t\ttime.Sleep(time.Second * 5)\n\n\t\t\tif tt.restartInitialTimeouts != 0 {\n\t\t\t\tdisconnected.conf.HeartbeatTimeout = tt.restartInitialTimeouts\n\t\t\t\tdisconnected.conf.ElectionTimeout = tt.restartInitialTimeouts\n\t\t\t}\n\t\t\tdisconnected.logger.Info(\"restarting follower\")\n\t\t\tdisconnected.Restart(t)\n\n\t\t\ttime.Sleep(time.Second * 2)\n\n\t\t\tif tt.expectNewLeader {\n\t\t\t\trequire.NotEqual(t, 0, atomic.LoadUint32(leaderChanges))\n\t\t\t} else {\n\t\t\t\trequire.Equal(t, uint32(0), atomic.LoadUint32(leaderChanges))\n\t\t\t}\n\n\t\t\tif tt.restartInitialTimeouts != 0 {\n\t\t\t\tfor _, env := range envs {\n\t\t\t\t\tenv.raft.RegisterObserver(NewObserver(requestVoteCh, false, seeRequestVote))\n\t\t\t\t\tNoErr(env.raft.ReloadConfig(ReloadableConfig{\n\t\t\t\t\t\tTrailingLogs:      conf.TrailingLogs,\n\t\t\t\t\t\tSnapshotInterval:  conf.SnapshotInterval,\n\t\t\t\t\t\tSnapshotThreshold: conf.SnapshotThreshold,\n\t\t\t\t\t\tHeartbeatTimeout:  250 * time.Millisecond,\n\t\t\t\t\t\tElectionTimeout:   250 * time.Millisecond,\n\t\t\t\t\t}), t)\n\t\t\t\t}\n\t\t\t\t// Make sure that reload by itself doesn't trigger a vote\n\t\t\t\ttime.Sleep(300 * time.Millisecond)\n\t\t\t\trequire.Equal(t, uint32(0), atomic.LoadUint32(requestVotes))\n\n\t\t\t\t// Stop the leader, ensure that we don't see a request vote within the first 50ms\n\t\t\t\t// (original config of the non-restarted follower), but that we do see one within\n\t\t\t\t// the 250ms both followers should now be using for heartbeat timeout.  Well, not\n\t\t\t\t// quite: we wait for two heartbeat intervals (plus a fudge factor), because the\n\t\t\t\t// first time around, last contact will have been recent enough that no vote will\n\t\t\t\t// be triggered.\n\t\t\t\tenv1.logger.Info(\"stopping leader\")\n\t\t\t\tenv1.Shutdown()\n\t\t\t\ttime.Sleep(50 * time.Millisecond)\n\t\t\t\trequire.Equal(t, uint32(0), atomic.LoadUint32(requestVotes))\n\t\t\t\ttime.Sleep(600 * time.Millisecond)\n\t\t\t\trequire.NotEqual(t, uint32(0), atomic.LoadUint32(requestVotes))\n\t\t\t}\n\n\t\t\tfor _, e := range allEnvs {\n\t\t\t\te.Release()\n\t\t\t}\n\t\t})\n\t}\n}\n\n// TestRaft_PreVote_LeaderSpam test that when a leader spam the followers\n// with pre-vote requests they can still transition to candidate.\n// The reason this test need to live in here is that we need the transport heartbeat fast-path\n// to use as a trick to avoid heartbeat keeping the cluster stable.\n// That fast-path only exists in the net transport.\nfunc TestRaft_PreVote_LeaderSpam(t *testing.T) {\n\tCheckInteg(t)\n\tconf := DefaultConfig()\n\tconf.LocalID = ServerID(\"first\")\n\tconf.HeartbeatTimeout = 50 * time.Millisecond\n\tconf.ElectionTimeout = 50 * time.Millisecond\n\tconf.LeaderLeaseTimeout = 50 * time.Millisecond\n\tconf.CommitTimeout = 5 * time.Second\n\tconf.SnapshotThreshold = 100\n\tconf.TrailingLogs = 10\n\n\t// Create a single node\n\tleader := MakeRaft(t, conf, true)\n\tNoErr(WaitFor(leader, Leader), t)\n\n\t// Join a few nodes!\n\tvar followers []*RaftEnv\n\tfor i := 0; i < 2; i++ {\n\t\tconf.LocalID = ServerID(fmt.Sprintf(\"next-batch-%d\", i))\n\t\tenv := MakeRaft(t, conf, false)\n\t\taddr := env.trans.LocalAddr()\n\t\tNoErr(WaitFuture(leader.raft.AddVoter(conf.LocalID, addr, 0, 0)), t)\n\t\tfollowers = append(followers, env)\n\t}\n\n\t// Wait for a leader\n\t_, err := WaitForAny(Leader, append([]*RaftEnv{leader}, followers...))\n\tNoErr(err, t)\n\n\tCheckConsistent(append([]*RaftEnv{leader}, followers...), t)\n\n\tleaderT := leader.raft.trans\n\n\t// spam all the followers with pre-vote requests from the leader\n\t// those requests should be granted as long as the leader haven't changed.\n\tctx, cancel := context.WithCancel(context.Background())\n\tdefer cancel()\n\tgo func() {\n\t\tfor {\n\t\t\tticker := time.NewTicker(conf.HeartbeatTimeout / 2)\n\t\t\tfor _, f := range followers {\n\t\t\t\trsp := RequestPreVoteResponse{}\n\t\t\t\treqPreVote := RequestPreVoteRequest{\n\t\t\t\t\tRPCHeader:    leader.raft.getRPCHeader(),\n\t\t\t\t\tTerm:         leader.raft.getCurrentTerm() + 1,\n\t\t\t\t\tLastLogIndex: leader.raft.getLastIndex(),\n\t\t\t\t\tLastLogTerm:  leader.raft.getCurrentTerm(),\n\t\t\t\t}\n\t\t\t\t// We don't need to check the error here because when leader change\n\t\t\t\t// it will start failing with \"rejecting pre-vote request since we have a leader\"\n\t\t\t\t_ = leaderT.(WithPreVote).RequestPreVote(f.raft.localID, f.raft.localAddr, &reqPreVote, &rsp)\n\t\t\t}\n\t\t\tselect {\n\t\t\tcase <-ticker.C:\n\t\t\tcase <-ctx.Done():\n\t\t\t\treturn\n\t\t\t}\n\t\t}\n\t}()\n\ttime.Sleep(time.Second)\n\n\t// for all followers ignore heartbeat from current leader, so we can transition to candidate state.\n\t// the purpose of this test is to verify that spamming nodes with pre-votes don't cause them to never\n\t// transition to Candidates.\n\tfor _, f := range followers {\n\t\t//copy f to avoid data race\n\t\tf1 := f\n\t\tf1.trans.SetHeartbeatHandler(func(rpc RPC) {\n\t\t\tif a, ok := rpc.Command.(*AppendEntriesRequest); ok {\n\t\t\t\tif ServerID(a.GetRPCHeader().ID) == leader.raft.localID {\n\t\t\t\t\tresp := &AppendEntriesResponse{\n\t\t\t\t\t\tRPCHeader:      f1.raft.getRPCHeader(),\n\t\t\t\t\t\tTerm:           f1.raft.getCurrentTerm(),\n\t\t\t\t\t\tLastLog:        f1.raft.getLastIndex(),\n\t\t\t\t\t\tSuccess:        false,\n\t\t\t\t\t\tNoRetryBackoff: false,\n\t\t\t\t\t}\n\t\t\t\t\trpc.Respond(resp, nil)\n\t\t\t\t} else {\n\t\t\t\t\tf.raft.processHeartbeat(rpc)\n\t\t\t\t}\n\t\t\t}\n\t\t})\n\t}\n\ttime.Sleep(1 * time.Second)\n\t// New leader should be one of the  former followers.\n\t_, err = WaitForAny(Leader, followers)\n\tNoErr(err, t)\n}\n"
  },
  {
    "path": "log.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"fmt\"\n\t\"time\"\n\n\tmetrics \"github.com/hashicorp/go-metrics/compat\"\n)\n\n// LogType describes various types of log entries.\ntype LogType uint8\n\nconst (\n\t// LogCommand is applied to a user FSM.\n\tLogCommand LogType = iota\n\n\t// LogNoop is used to assert leadership.\n\tLogNoop\n\n\t// LogAddPeerDeprecated is used to add a new peer. This should only be used with\n\t// older protocol versions designed to be compatible with unversioned\n\t// Raft servers. See comments in config.go for details.\n\tLogAddPeerDeprecated\n\n\t// LogRemovePeerDeprecated is used to remove an existing peer. This should only be\n\t// used with older protocol versions designed to be compatible with\n\t// unversioned Raft servers. See comments in config.go for details.\n\tLogRemovePeerDeprecated\n\n\t// LogBarrier is used to ensure all preceding operations have been\n\t// applied to the FSM. It is similar to LogNoop, but instead of returning\n\t// once committed, it only returns once the FSM manager acks it. Otherwise,\n\t// it is possible there are operations committed but not yet applied to\n\t// the FSM.\n\tLogBarrier\n\n\t// LogConfiguration establishes a membership change configuration. It is\n\t// created when a server is added, removed, promoted, etc. Only used\n\t// when protocol version 1 or greater is in use.\n\tLogConfiguration\n)\n\n// String returns LogType as a human readable string.\nfunc (lt LogType) String() string {\n\tswitch lt {\n\tcase LogCommand:\n\t\treturn \"LogCommand\"\n\tcase LogNoop:\n\t\treturn \"LogNoop\"\n\tcase LogAddPeerDeprecated:\n\t\treturn \"LogAddPeerDeprecated\"\n\tcase LogRemovePeerDeprecated:\n\t\treturn \"LogRemovePeerDeprecated\"\n\tcase LogBarrier:\n\t\treturn \"LogBarrier\"\n\tcase LogConfiguration:\n\t\treturn \"LogConfiguration\"\n\tdefault:\n\t\treturn fmt.Sprintf(\"%d\", lt)\n\t}\n}\n\n// Log entries are replicated to all members of the Raft cluster\n// and form the heart of the replicated state machine.\ntype Log struct {\n\t// Index holds the index of the log entry.\n\tIndex uint64\n\n\t// Term holds the election term of the log entry.\n\tTerm uint64\n\n\t// Type holds the type of the log entry.\n\tType LogType\n\n\t// Data holds the log entry's type-specific data.\n\tData []byte\n\n\t// Extensions holds an opaque byte slice of information for middleware. It\n\t// is up to the client of the library to properly modify this as it adds\n\t// layers and remove those layers when appropriate. This value is a part of\n\t// the log, so very large values could cause timing issues.\n\t//\n\t// N.B. It is _up to the client_ to handle upgrade paths. For instance if\n\t// using this with go-raftchunking, the client should ensure that all Raft\n\t// peers are using a version that can handle that extension before ever\n\t// actually triggering chunking behavior. It is sometimes sufficient to\n\t// ensure that non-leaders are upgraded first, then the current leader is\n\t// upgraded, but a leader changeover during this process could lead to\n\t// trouble, so gating extension behavior via some flag in the client\n\t// program is also a good idea.\n\tExtensions []byte\n\n\t// AppendedAt stores the time the leader first appended this log to it's\n\t// LogStore. Followers will observe the leader's time. It is not used for\n\t// coordination or as part of the replication protocol at all. It exists only\n\t// to provide operational information for example how many seconds worth of\n\t// logs are present on the leader which might impact follower's ability to\n\t// catch up after restoring a large snapshot. We should never rely on this\n\t// being in the past when appending on a follower or reading a log back since\n\t// the clock skew can mean a follower could see a log with a future timestamp.\n\t// In general too the leader is not required to persist the log before\n\t// delivering to followers although the current implementation happens to do\n\t// this.\n\tAppendedAt time.Time\n}\n\n// LogStore is used to provide an interface for storing\n// and retrieving logs in a durable fashion.\ntype LogStore interface {\n\t// FirstIndex returns the first index written. 0 for no entries.\n\tFirstIndex() (uint64, error)\n\n\t// LastIndex returns the last index written. 0 for no entries.\n\tLastIndex() (uint64, error)\n\n\t// GetLog gets a log entry at a given index.\n\tGetLog(index uint64, log *Log) error\n\n\t// StoreLog stores a log entry.\n\tStoreLog(log *Log) error\n\n\t// StoreLogs stores multiple log entries. By default the logs stored may not be contiguous with previous logs (i.e. may have a gap in Index since the last log written). If an implementation can't tolerate this it may optionally implement `MonotonicLogStore` to indicate that this is not allowed. This changes Raft's behaviour after restoring a user snapshot to remove all previous logs instead of relying on a \"gap\" to signal the discontinuity between logs before the snapshot and logs after.\n\tStoreLogs(logs []*Log) error\n\n\t// DeleteRange deletes a range of log entries. The range is inclusive.\n\tDeleteRange(min, max uint64) error\n}\n\n// MonotonicLogStore is an optional interface for LogStore implementations that\n// cannot tolerate gaps in between the Index values of consecutive log entries. For example,\n// this may allow more efficient indexing because the Index values are densely populated. If true is\n// returned, Raft will avoid relying on gaps to trigger re-synching logs on followers after a\n// snapshot is restored. The LogStore must have an efficient implementation of\n// DeleteLogs for the case where all logs are removed, as this must be called after snapshot restore when gaps are not allowed.\n// We avoid deleting all records for LogStores that do not implement MonotonicLogStore\n// because although it's always correct to do so, it has a major negative performance impact on the BoltDB store that is currently\n// the most widely used.\ntype MonotonicLogStore interface {\n\tIsMonotonic() bool\n}\n\nfunc oldestLog(s LogStore) (Log, error) {\n\tvar l Log\n\n\t// We might get unlucky and have a truncate right between getting first log\n\t// index and fetching it so keep trying until we succeed or hard fail.\n\tvar lastFailIdx uint64\n\tvar lastErr error\n\tfor {\n\t\tfirstIdx, err := s.FirstIndex()\n\t\tif err != nil {\n\t\t\treturn l, err\n\t\t}\n\t\tif firstIdx == 0 {\n\t\t\treturn l, ErrLogNotFound\n\t\t}\n\t\tif firstIdx == lastFailIdx {\n\t\t\t// Got same index as last time around which errored, don't bother trying\n\t\t\t// to fetch it again just return the error.\n\t\t\treturn l, lastErr\n\t\t}\n\t\terr = s.GetLog(firstIdx, &l)\n\t\tif err == nil {\n\t\t\t// We found the oldest log, break the loop\n\t\t\tbreak\n\t\t}\n\t\t// We failed, keep trying to see if there is a new firstIndex\n\t\tlastFailIdx = firstIdx\n\t\tlastErr = err\n\t}\n\treturn l, nil\n}\n\nfunc emitLogStoreMetrics(s LogStore, prefix []string, interval time.Duration, stopCh <-chan struct{}) {\n\tfor {\n\t\tselect {\n\t\tcase <-time.After(interval):\n\t\t\t// In error case emit 0 as the age\n\t\t\tageMs := float32(0.0)\n\t\t\tl, err := oldestLog(s)\n\t\t\tif err == nil && !l.AppendedAt.IsZero() {\n\t\t\t\tageMs = float32(time.Since(l.AppendedAt).Milliseconds())\n\t\t\t}\n\t\t\tmetrics.SetGauge(append(prefix, \"oldestLogAge\"), ageMs)\n\t\tcase <-stopCh:\n\t\t\treturn\n\t\t}\n\t}\n}\n"
  },
  {
    "path": "log_cache.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"fmt\"\n\t\"sync\"\n)\n\n// LogCache wraps any LogStore implementation to provide an\n// in-memory ring buffer. This is used to cache access to\n// the recently written entries. For implementations that do not\n// cache themselves, this can provide a substantial boost by\n// avoiding disk I/O on recent entries.\ntype LogCache struct {\n\tstore LogStore\n\n\tcache []*Log\n\tl     sync.RWMutex\n}\n\n// NewLogCache is used to create a new LogCache with the\n// given capacity and backend store.\nfunc NewLogCache(capacity int, store LogStore) (*LogCache, error) {\n\tif capacity <= 0 {\n\t\treturn nil, fmt.Errorf(\"capacity must be positive\")\n\t}\n\tc := &LogCache{\n\t\tstore: store,\n\t\tcache: make([]*Log, capacity),\n\t}\n\treturn c, nil\n}\n\n// IsMonotonic implements the MonotonicLogStore interface. This is a shim to\n// expose the underlying store as monotonically indexed or not.\nfunc (c *LogCache) IsMonotonic() bool {\n\tif store, ok := c.store.(MonotonicLogStore); ok {\n\t\treturn store.IsMonotonic()\n\t}\n\n\treturn false\n}\n\nfunc (c *LogCache) GetLog(idx uint64, log *Log) error {\n\t// Check the buffer for an entry\n\tc.l.RLock()\n\tcached := c.cache[idx%uint64(len(c.cache))]\n\tc.l.RUnlock()\n\n\t// Check if entry is valid\n\tif cached != nil && cached.Index == idx {\n\t\t*log = *cached\n\t\treturn nil\n\t}\n\n\t// Forward request on cache miss\n\treturn c.store.GetLog(idx, log)\n}\n\nfunc (c *LogCache) StoreLog(log *Log) error {\n\treturn c.StoreLogs([]*Log{log})\n}\n\nfunc (c *LogCache) StoreLogs(logs []*Log) error {\n\terr := c.store.StoreLogs(logs)\n\t// Insert the logs into the ring buffer, but only on success\n\tif err != nil {\n\t\treturn fmt.Errorf(\"unable to store logs within log store, err: %q\", err)\n\t}\n\tc.l.Lock()\n\tfor _, l := range logs {\n\t\tc.cache[l.Index%uint64(len(c.cache))] = l\n\t}\n\tc.l.Unlock()\n\treturn nil\n}\n\nfunc (c *LogCache) FirstIndex() (uint64, error) {\n\treturn c.store.FirstIndex()\n}\n\nfunc (c *LogCache) LastIndex() (uint64, error) {\n\treturn c.store.LastIndex()\n}\n\nfunc (c *LogCache) DeleteRange(min, max uint64) error {\n\t// Invalidate the cache on deletes\n\tc.l.Lock()\n\tc.cache = make([]*Log, len(c.cache))\n\tc.l.Unlock()\n\n\treturn c.store.DeleteRange(min, max)\n}\n"
  },
  {
    "path": "log_cache_test.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"errors\"\n\t\"strings\"\n\t\"sync\"\n\t\"testing\"\n)\n\nfunc TestLogCache(t *testing.T) {\n\tstore := NewInmemStore()\n\tc, _ := NewLogCache(16, store)\n\n\t// Insert into the in-mem store\n\tfor i := 0; i < 32; i++ {\n\t\tlog := &Log{Index: uint64(i) + 1}\n\t\t_ = store.StoreLog(log)\n\t}\n\n\t// Check the indexes\n\tif idx, _ := c.FirstIndex(); idx != 1 {\n\t\tt.Fatalf(\"bad: %d\", idx)\n\t}\n\tif idx, _ := c.LastIndex(); idx != 32 {\n\t\tt.Fatalf(\"bad: %d\", idx)\n\t}\n\n\t// Try get log with a miss\n\tvar out Log\n\terr := c.GetLog(1, &out)\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tif out.Index != 1 {\n\t\tt.Fatalf(\"bad: %#v\", out)\n\t}\n\n\t// Store logs\n\tl1 := &Log{Index: 33}\n\tl2 := &Log{Index: 34}\n\terr = c.StoreLogs([]*Log{l1, l2})\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\tif idx, _ := c.LastIndex(); idx != 34 {\n\t\tt.Fatalf(\"bad: %d\", idx)\n\t}\n\n\t// Check that it wrote-through\n\terr = store.GetLog(33, &out)\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\terr = store.GetLog(34, &out)\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Delete in the backend\n\terr = store.DeleteRange(33, 34)\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Should be in the ring buffer\n\terr = c.GetLog(33, &out)\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\terr = c.GetLog(34, &out)\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Purge the ring buffer\n\terr = c.DeleteRange(33, 34)\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Should not be in the ring buffer\n\terr = c.GetLog(33, &out)\n\tif err != ErrLogNotFound {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\terr = c.GetLog(34, &out)\n\tif err != ErrLogNotFound {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n}\n\ntype errorStore struct {\n\tLogStore\n\tmu      sync.Mutex\n\tfail    bool\n\tfailed  int\n\tfailMax int\n}\n\nfunc (e *errorStore) StoreLogs(logs []*Log) error {\n\te.mu.Lock()\n\tdefer e.mu.Unlock()\n\tif e.fail {\n\t\te.failed++\n\t\tif e.failed <= e.failMax {\n\t\t\treturn errors.New(\"some error\")\n\t\t}\n\t\te.fail = false\n\t}\n\treturn e.LogStore.StoreLogs(logs)\n}\n\nfunc (e *errorStore) failNext(count int) {\n\te.mu.Lock()\n\te.fail = true\n\te.failMax = count\n\te.mu.Unlock()\n}\n\nfunc TestLogCacheWithBackendStoreError(t *testing.T) {\n\tvar err error\n\tstore := NewInmemStore()\n\terrStore := &errorStore{LogStore: store}\n\tc, _ := NewLogCache(16, errStore)\n\n\tfor i := 0; i < 4; i++ {\n\t\tlog := &Log{Index: uint64(i) + 1}\n\t\t_ = store.StoreLog(log)\n\t}\n\terrStore.failNext(1)\n\tlog := &Log{Index: 5}\n\terr = c.StoreLog(log)\n\tif !strings.Contains(err.Error(), \"some error\") {\n\t\tt.Fatalf(\"wanted: some error,  got err=%v\", err)\n\t}\n\n\tvar out Log\n\tfor i := 1; i < 5; i++ {\n\t\tif err := c.GetLog(uint64(i), &out); err != nil {\n\t\t\tt.Fatalf(\"err: %v\", err)\n\t\t}\n\t}\n\tout = Log{}\n\tif err = c.GetLog(5, &out); err != ErrLogNotFound {\n\t\tt.Fatalf(\"Should have returned not found, got err=%v out=%+v\", err, out)\n\t}\n}\n"
  },
  {
    "path": "log_test.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"bytes\"\n\t\"fmt\"\n\t\"testing\"\n\t\"time\"\n\n\tmetrics \"github.com/hashicorp/go-metrics/compat\"\n)\n\nfunc TestOldestLog(t *testing.T) {\n\tcases := []struct {\n\t\tName    string\n\t\tLogs    []*Log\n\t\tWantIdx uint64\n\t\tWantErr bool\n\t}{\n\t\t{\n\t\t\tName:    \"empty logs\",\n\t\t\tLogs:    nil,\n\t\t\tWantIdx: 0,\n\t\t\tWantErr: true,\n\t\t},\n\t\t{\n\t\t\tName: \"simple case\",\n\t\t\tLogs: []*Log{\n\t\t\t\t{\n\t\t\t\t\tIndex: 1234,\n\t\t\t\t\tTerm:  1,\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\tIndex: 1235,\n\t\t\t\t\tTerm:  1,\n\t\t\t\t},\n\t\t\t\t{\n\t\t\t\t\tIndex: 1236,\n\t\t\t\t\tTerm:  2,\n\t\t\t\t},\n\t\t\t},\n\t\t\tWantIdx: 1234,\n\t\t\tWantErr: false,\n\t\t},\n\t}\n\n\tfor _, tc := range cases {\n\t\ttc := tc\n\t\tt.Run(tc.Name, func(t *testing.T) {\n\t\t\ts := NewInmemStore()\n\t\t\tif err := s.StoreLogs(tc.Logs); err != nil {\n\t\t\t\tt.Fatalf(\"expected store logs not to fail: %s\", err)\n\t\t\t}\n\n\t\t\tgot, err := oldestLog(s)\n\t\t\tswitch {\n\t\t\tcase tc.WantErr && err == nil:\n\t\t\t\tt.Fatalf(\"wanted error got nil\")\n\t\t\tcase !tc.WantErr && err != nil:\n\t\t\t\tt.Fatalf(\"wanted no error got: %s\", err)\n\t\t\t}\n\n\t\t\tif got.Index != tc.WantIdx {\n\t\t\t\tt.Fatalf(\"got index %v, want %v\", got.Index, tc.WantIdx)\n\t\t\t}\n\t\t})\n\t}\n}\n\nfunc TestEmitsLogStoreMetrics(t *testing.T) {\n\tsink := testSetupMetrics(t)\n\n\tstart := time.Now()\n\n\ts := NewInmemStore()\n\tlogs := []*Log{\n\t\t{\n\t\t\tIndex:      1234,\n\t\t\tTerm:       1,\n\t\t\tAppendedAt: time.Now(),\n\t\t},\n\t\t{\n\t\t\tIndex: 1235,\n\t\t\tTerm:  1,\n\t\t},\n\t\t{\n\t\t\tIndex: 1236,\n\t\t\tTerm:  2,\n\t\t},\n\t}\n\tif err := s.StoreLogs(logs); err != nil {\n\t\tt.Fatalf(\"expected store logs not to fail: %s\", err)\n\t}\n\n\tstopCh := make(chan struct{})\n\tdefer close(stopCh)\n\n\tgo emitLogStoreMetrics(s, []string{\"foo\"}, time.Millisecond, stopCh)\n\n\t// Wait for at least one interval\n\ttime.Sleep(5 * time.Millisecond)\n\n\tgot := getCurrentGaugeValue(t, sink, \"raft.test.foo.oldestLogAge\")\n\n\t// Assert the age is in a reasonable range.\n\tif got > float32(time.Since(start).Milliseconds()) {\n\t\tt.Fatalf(\"max age before test start: %v\", got)\n\t}\n\n\tif got < 1 {\n\t\tt.Fatalf(\"max age less than interval: %v\", got)\n\t}\n}\n\nfunc testSetupMetrics(t *testing.T) *metrics.InmemSink {\n\t// Record for ages (5 mins) so we can be confident that our assertions won't\n\t// fail on silly long test runs due to dropped data.\n\ts := metrics.NewInmemSink(10*time.Second, 300*time.Second)\n\tcfg := metrics.DefaultConfig(\"raft.test\")\n\tcfg.EnableHostname = false\n\t_, _ = metrics.NewGlobal(cfg, s)\n\treturn s\n}\n\nfunc getCurrentGaugeValue(t *testing.T, sink *metrics.InmemSink, name string) float32 {\n\tt.Helper()\n\n\tdata := sink.Data()\n\n\t// Loop backward through intervals until there is a non-empty one\n\t// Addresses flakiness around recording to one interval but accessing during the next\n\tfor i := len(data) - 1; i >= 0; i-- {\n\t\tcurrentInterval := data[i]\n\n\t\tcurrentInterval.RLock()\n\t\tif gv, ok := currentInterval.Gauges[name]; ok {\n\t\t\tcurrentInterval.RUnlock()\n\t\t\treturn gv.Value\n\t\t}\n\t\tcurrentInterval.RUnlock()\n\t}\n\n\t// Debug print all the gauges\n\tbuf := bytes.NewBuffer(nil)\n\tfor _, intv := range data {\n\t\tintv.RLock()\n\t\tfor name, val := range intv.Gauges {\n\t\t\tfmt.Fprintf(buf, \"[%v][G] '%s': %0.3f\\n\", intv.Interval, name, val.Value)\n\t\t}\n\t\tintv.RUnlock()\n\t}\n\tt.Log(buf.String())\n\n\tt.Fatalf(\"didn't find gauge %q\", name)\n\treturn 0\n}\n"
  },
  {
    "path": "membership.md",
    "content": "Simon (@superfell) and I (@ongardie) talked through reworking this library's cluster membership changes last Friday. We don't see a way to split this into independent patches, so we're taking the next best approach: submitting the plan here for review, then working on an enormous PR. Your feedback would be appreciated. (@superfell is out this week, however, so don't expect him to respond quickly.)\n\nThese are the main goals:\n - Bringing things in line with the description in my PhD dissertation;\n - Catching up new servers prior to granting them a vote, as well as allowing permanent non-voting members; and\n - Eliminating the `peers.json` file, to avoid issues of consistency between that and the log/snapshot.\n\n## Data-centric view\n\nWe propose to re-define a *configuration* as a set of servers, where each server includes an address (as it does today) and a mode that is either:\n - *Voter*: a server whose vote is counted in elections and whose match index is used in advancing the leader's commit index.\n - *Nonvoter*: a server that receives log entries but is not considered for elections or commitment purposes.\n - *Staging*: a server that acts like a nonvoter with one exception: once a staging server receives enough log entries to catch up sufficiently to the leader's log, the leader will invoke a  membership change to change the staging server to a voter.\n\nAll changes to the configuration will be done by writing a new configuration to the log. The new configuration will be in affect as soon as it is appended to the log (not when it is committed like a normal state machine command). Note that, per my dissertation, there can be at most one uncommitted configuration at a time (the next configuration may not be created until the prior one has been committed). It's not strictly necessary to follow these same rules for the nonvoter/staging servers, but we think its best to treat all changes uniformly.\n\nEach server will track two configurations:\n 1. its *committed configuration*: the latest configuration in the log/snapshot that has been committed, along with its index.\n 2. its *latest configuration*: the latest configuration in the log/snapshot (may be committed or uncommitted), along with its index.\n\nWhen there's no membership change happening, these two will be the same. The latest configuration is almost always the one used, except:\n - When followers truncate the suffix of their logs, they may need to fall back to the committed configuration.\n - When snapshotting, the committed configuration is written, to correspond with the committed log prefix that is being snapshotted.\n\n\n## Application API\n\nWe propose the following operations for clients to manipulate the cluster configuration:\n - AddVoter: server becomes staging unless voter,\n - AddNonvoter: server becomes nonvoter unless staging or voter,\n - DemoteVoter: server becomes nonvoter unless absent,\n - RemovePeer: server removed from configuration,\n - GetConfiguration: waits for latest config to commit, returns committed config.\n\nThis diagram, of which I'm quite proud, shows the possible transitions:\n```\n+-----------------------------------------------------------------------------+\n|                                                                             |\n|                      Start ->  +--------+                                   |\n|            ,------<------------|        |                                   |\n|           /                    | absent |                                   |\n|          /       RemovePeer--> |        | <---RemovePeer                    |\n|         /            |         +--------+               \\                   |\n|        /             |            |                      \\                  |\n|   AddNonvoter        |         AddVoter                   \\                 |\n|       |       ,->---' `--<-.      |                        \\                |\n|       v      /              \\     v                         \\               |\n|  +----------+                +----------+                    +----------+   |\n|  |          | ---AddVoter--> |          | -log caught up --> |          |   |\n|  | nonvoter |                | staging  |                    |  voter   |   |\n|  |          | <-DemoteVoter- |          |                 ,- |          |   |\n|  +----------+         \\      +----------+                /   +----------+   |\n|                        \\                                /                   |\n|                         `--------------<---------------'                    |\n|                                                                             |\n+-----------------------------------------------------------------------------+\n```\n\nWhile these operations aren't quite symmetric, we think they're a good set to capture\nthe possible intent of the user. For example, if I want to make sure a server doesn't have a vote, but the server isn't part of the configuration at all, it probably shouldn't be added as a nonvoting server.\n\nEach of these application-level operations will be interpreted by the leader and, if it has an effect, will cause the leader to write a new configuration entry to its log. Which particular application-level operation caused the log entry to be written need not be part of the log entry.\n\n## Code implications\n\nThis is a non-exhaustive list, but we came up with a few things:\n- Remove the PeerStore: the `peers.json` file introduces the possibility of getting out of sync with the log and snapshot, and it's hard to maintain this atomically as the log changes. It's not clear whether it's meant to track the committed or latest configuration, either.\n- Servers will have to search their snapshot and log to find the committed configuration and the latest configuration on startup.\n- Bootstrap will no longer use `peers.json` but should initialize the log or snapshot with an application-provided configuration entry.\n- Snapshots should store the index of their configuration along with the configuration itself. In my experience with LogCabin, the original log index of the configuration is very useful to include in debug log messages.\n- As noted in hashicorp/raft#84, configuration change requests should come in via a separate channel, and one may not proceed until the last has been committed.\n- As to deciding when a log is sufficiently caught up, implementing a sophisticated algorithm *is* something that can be done in a separate PR. An easy and decent placeholder is: once the staging server has reached 95% of the leader's commit index, promote it.\n\n## Feedback\n\nAgain, we're looking for feedback here before we start working on this. Here are some questions to think about:\n - Does this seem like where we want things to go?\n - Is there anything here that should be left out?\n - Is there anything else we're forgetting about?\n - Is there a good way to break this up?\n - What do we need to worry about in terms of backwards compatibility?\n - What implication will this have on current tests?\n - What's the best way to test this code, in particular the small changes that will be sprinkled all over the library?\n"
  },
  {
    "path": "net_transport.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"bufio\"\n\t\"context\"\n\t\"errors\"\n\t\"fmt\"\n\t\"io\"\n\t\"net\"\n\t\"os\"\n\t\"sync\"\n\t\"time\"\n\n\t\"github.com/hashicorp/go-hclog\"\n\t\"github.com/hashicorp/go-metrics/compat\"\n\t\"github.com/hashicorp/go-msgpack/v2/codec\"\n)\n\nconst (\n\trpcAppendEntries uint8 = iota\n\trpcRequestVote\n\trpcInstallSnapshot\n\trpcTimeoutNow\n\trpcRequestPreVote\n\n\t// DefaultTimeoutScale is the default TimeoutScale in a NetworkTransport.\n\tDefaultTimeoutScale = 256 * 1024 // 256KB\n\n\t// DefaultMaxRPCsInFlight is the default value used for pipelining configuration\n\t// if a zero value is passed. See https://github.com/hashicorp/raft/pull/541\n\t// for rationale. Note, if this is changed we should update the doc comments\n\t// below for NetworkTransportConfig.MaxRPCsInFlight.\n\tDefaultMaxRPCsInFlight = 2\n\n\t// connReceiveBufferSize is the size of the buffer we will use for reading RPC requests into\n\t// on followers\n\tconnReceiveBufferSize = 256 * 1024 // 256KB\n\n\t// connSendBufferSize is the size of the buffer we will use for sending RPC request data from\n\t// the leader to followers.\n\tconnSendBufferSize = 256 * 1024 // 256KB\n\n\t// minInFlightForPipelining is a property of our current pipelining\n\t// implementation and must not be changed unless we change the invariants of\n\t// that implementation. Roughly speaking even with a zero-length in-flight\n\t// buffer we still allow 2 requests to be in-flight before we block because we\n\t// only block after sending and the receiving go-routine always unblocks the\n\t// chan right after first send. This is a constant just to provide context\n\t// rather than a magic number in a few places we have to check invariants to\n\t// avoid panics etc.\n\tminInFlightForPipelining = 2\n)\n\nvar (\n\t// ErrTransportShutdown is returned when operations on a transport are\n\t// invoked after it's been terminated.\n\tErrTransportShutdown = errors.New(\"transport shutdown\")\n\n\t// ErrPipelineShutdown is returned when the pipeline is closed.\n\tErrPipelineShutdown = errors.New(\"append pipeline closed\")\n)\n\n// NetworkTransport provides a network based transport that can be\n// used to communicate with Raft on remote machines. It requires\n// an underlying stream layer to provide a stream abstraction, which can\n// be simple TCP, TLS, etc.\n//\n// This transport is very simple and lightweight. Each RPC request is\n// framed by sending a byte that indicates the message type, followed\n// by the MsgPack encoded request.\n//\n// The response is an error string followed by the response object,\n// both are encoded using MsgPack.\n//\n// InstallSnapshot is special, in that after the RPC request we stream\n// the entire state. That socket is not re-used as the connection state\n// is not known if there is an error.\ntype NetworkTransport struct {\n\tconnPool     map[ServerAddress][]*netConn\n\tconnPoolLock sync.Mutex\n\n\tconsumeCh chan RPC\n\n\theartbeatFn     func(RPC)\n\theartbeatFnLock sync.Mutex\n\n\tlogger hclog.Logger\n\n\tmaxPool     int\n\tmaxInFlight int\n\n\tserverAddressLock     sync.RWMutex\n\tserverAddressProvider ServerAddressProvider\n\n\tshutdown     bool\n\tshutdownCh   chan struct{}\n\tshutdownLock sync.Mutex\n\n\tstream StreamLayer\n\n\t// streamCtx is used to cancel existing connection handlers.\n\tstreamCtx     context.Context\n\tstreamCancel  context.CancelFunc\n\tstreamCtxLock sync.RWMutex\n\n\ttimeout      time.Duration\n\tTimeoutScale int\n\n\tmsgpackUseNewTimeFormat bool\n}\n\n// NetworkTransportConfig encapsulates configuration for the network transport layer.\ntype NetworkTransportConfig struct {\n\t// ServerAddressProvider is used to override the target address when establishing a connection to invoke an RPC\n\tServerAddressProvider ServerAddressProvider\n\n\tLogger hclog.Logger\n\n\t// Dialer\n\tStream StreamLayer\n\n\t// MaxPool controls how many connections we will pool\n\tMaxPool int\n\n\t// MaxRPCsInFlight controls the pipelining \"optimization\" when replicating\n\t// entries to followers.\n\t//\n\t// Setting this to 1 explicitly disables pipelining since no overlapping of\n\t// request processing is allowed. If set to 1 the pipelining code path is\n\t// skipped entirely and every request is entirely synchronous.\n\t//\n\t// If zero is set (or left as default), DefaultMaxRPCsInFlight is used which\n\t// is currently 2. A value of 2 overlaps the preparation and sending of the\n\t// next request while waiting for the previous response, but avoids additional\n\t// queuing.\n\t//\n\t// Historically this was internally fixed at (effectively) 130 however\n\t// performance testing has shown that in practice the pipelining optimization\n\t// combines badly with batching and actually has a very large negative impact\n\t// on commit latency when throughput is high, whilst having very little\n\t// benefit on latency or throughput in any other case! See\n\t// [#541](https://github.com/hashicorp/raft/pull/541) for more analysis of the\n\t// performance impacts.\n\t//\n\t// Increasing this beyond 2 is likely to be beneficial only in very\n\t// high-latency network conditions. HashiCorp doesn't recommend using our own\n\t// products this way.\n\t//\n\t// To maintain the behavior from before version 1.4.1 exactly, set this to\n\t// 130. The old internal constant was 128 but was used directly as a channel\n\t// buffer size. Since we send before blocking on the channel and unblock the\n\t// channel as soon as the receiver is done with the earliest outstanding\n\t// request, even an unbuffered channel (buffer=0) allows one request to be\n\t// sent while waiting for the previous one (i.e. 2 inflight). so the old\n\t// buffer actually allowed 130 RPCs to be inflight at once.\n\tMaxRPCsInFlight int\n\n\t// Timeout is used to apply I/O deadlines. For InstallSnapshot, we multiply\n\t// the timeout by (SnapshotSize / TimeoutScale).\n\tTimeout time.Duration\n\n\t// MsgpackUseNewTimeFormat when set to true, force the underlying msgpack\n\t// codec to use the new format of time.Time when encoding (used in\n\t// go-msgpack v1.1.5 by default). Decoding is not affected, as all\n\t// go-msgpack v2.1.0+ decoders know how to decode both formats.\n\tMsgpackUseNewTimeFormat bool\n}\n\n// ServerAddressProvider is a target address to which we invoke an RPC when establishing a connection\ntype ServerAddressProvider interface {\n\tServerAddr(id ServerID) (ServerAddress, error)\n}\n\n// StreamLayer is used with the NetworkTransport to provide\n// the low level stream abstraction.\ntype StreamLayer interface {\n\tnet.Listener\n\n\t// Dial is used to create a new outgoing connection\n\tDial(address ServerAddress, timeout time.Duration) (net.Conn, error)\n}\n\ntype netConn struct {\n\ttarget ServerAddress\n\tconn   net.Conn\n\tw      *bufio.Writer\n\tdec    *codec.Decoder\n\tenc    *codec.Encoder\n}\n\nfunc (n *netConn) Release() error {\n\treturn n.conn.Close()\n}\n\ntype netPipeline struct {\n\tconn  *netConn\n\ttrans *NetworkTransport\n\n\tdoneCh       chan AppendFuture\n\tinprogressCh chan *appendFuture\n\n\tshutdown     bool\n\tshutdownCh   chan struct{}\n\tshutdownLock sync.Mutex\n}\n\n// NewNetworkTransportWithConfig creates a new network transport with the given config struct\nfunc NewNetworkTransportWithConfig(\n\tconfig *NetworkTransportConfig,\n) *NetworkTransport {\n\tif config.Logger == nil {\n\t\tconfig.Logger = hclog.New(&hclog.LoggerOptions{\n\t\t\tName:   \"raft-net\",\n\t\t\tOutput: hclog.DefaultOutput,\n\t\t\tLevel:  hclog.DefaultLevel,\n\t\t})\n\t}\n\tmaxInFlight := config.MaxRPCsInFlight\n\tif maxInFlight == 0 {\n\t\t// Default zero value\n\t\tmaxInFlight = DefaultMaxRPCsInFlight\n\t}\n\ttrans := &NetworkTransport{\n\t\tconnPool:                make(map[ServerAddress][]*netConn),\n\t\tconsumeCh:               make(chan RPC),\n\t\tlogger:                  config.Logger,\n\t\tmaxPool:                 config.MaxPool,\n\t\tmaxInFlight:             maxInFlight,\n\t\tshutdownCh:              make(chan struct{}),\n\t\tstream:                  config.Stream,\n\t\ttimeout:                 config.Timeout,\n\t\tTimeoutScale:            DefaultTimeoutScale,\n\t\tserverAddressProvider:   config.ServerAddressProvider,\n\t\tmsgpackUseNewTimeFormat: config.MsgpackUseNewTimeFormat,\n\t}\n\n\t// Create the connection context and then start our listener.\n\ttrans.setupStreamContext()\n\tgo trans.listen()\n\n\treturn trans\n}\n\n// NewNetworkTransport creates a new network transport with the given dialer\n// and listener. The maxPool controls how many connections we will pool. The\n// timeout is used to apply I/O deadlines. For InstallSnapshot, we multiply\n// the timeout by (SnapshotSize / TimeoutScale).\nfunc NewNetworkTransport(\n\tstream StreamLayer,\n\tmaxPool int,\n\ttimeout time.Duration,\n\tlogOutput io.Writer,\n) *NetworkTransport {\n\tif logOutput == nil {\n\t\tlogOutput = os.Stderr\n\t}\n\tlogger := hclog.New(&hclog.LoggerOptions{\n\t\tName:   \"raft-net\",\n\t\tOutput: logOutput,\n\t\tLevel:  hclog.DefaultLevel,\n\t})\n\tconfig := &NetworkTransportConfig{Stream: stream, MaxPool: maxPool, Timeout: timeout, Logger: logger}\n\treturn NewNetworkTransportWithConfig(config)\n}\n\n// NewNetworkTransportWithLogger creates a new network transport with the given logger, dialer\n// and listener. The maxPool controls how many connections we will pool. The\n// timeout is used to apply I/O deadlines. For InstallSnapshot, we multiply\n// the timeout by (SnapshotSize / TimeoutScale).\nfunc NewNetworkTransportWithLogger(\n\tstream StreamLayer,\n\tmaxPool int,\n\ttimeout time.Duration,\n\tlogger hclog.Logger,\n) *NetworkTransport {\n\tconfig := &NetworkTransportConfig{Stream: stream, MaxPool: maxPool, Timeout: timeout, Logger: logger}\n\treturn NewNetworkTransportWithConfig(config)\n}\n\n// setupStreamContext is used to create a new stream context. This should be\n// called with the stream lock held.\nfunc (n *NetworkTransport) setupStreamContext() {\n\tctx, cancel := context.WithCancel(context.Background())\n\tn.streamCtx = ctx\n\tn.streamCancel = cancel\n}\n\n// getStreamContext is used retrieve the current stream context.\nfunc (n *NetworkTransport) getStreamContext() context.Context {\n\tn.streamCtxLock.RLock()\n\tdefer n.streamCtxLock.RUnlock()\n\treturn n.streamCtx\n}\n\n// SetHeartbeatHandler is used to set up a heartbeat handler\n// as a fast-pass. This is to avoid head-of-line blocking from\n// disk IO.\nfunc (n *NetworkTransport) SetHeartbeatHandler(cb func(rpc RPC)) {\n\tn.heartbeatFnLock.Lock()\n\tdefer n.heartbeatFnLock.Unlock()\n\tn.heartbeatFn = cb\n}\n\n// CloseStreams closes the current streams.\nfunc (n *NetworkTransport) CloseStreams() {\n\tn.connPoolLock.Lock()\n\tdefer n.connPoolLock.Unlock()\n\n\t// Close all the connections in the connection pool and then remove their\n\t// entry.\n\tfor k, e := range n.connPool {\n\t\tfor _, conn := range e {\n\t\t\t_ = conn.Release()\n\t\t}\n\n\t\tdelete(n.connPool, k)\n\t}\n\n\t// Cancel the existing connections and create a new context. Both these\n\t// operations must always be done with the lock held otherwise we can create\n\t// connection handlers that are holding a context that will never be\n\t// cancelable.\n\tn.streamCtxLock.Lock()\n\tn.streamCancel()\n\tn.setupStreamContext()\n\tn.streamCtxLock.Unlock()\n}\n\n// Close is used to stop the network transport.\nfunc (n *NetworkTransport) Close() error {\n\tn.shutdownLock.Lock()\n\tdefer n.shutdownLock.Unlock()\n\n\tif !n.shutdown {\n\t\tclose(n.shutdownCh)\n\t\t_ = n.stream.Close()\n\t\tn.shutdown = true\n\t}\n\treturn nil\n}\n\n// Consumer implements the Transport interface.\nfunc (n *NetworkTransport) Consumer() <-chan RPC {\n\treturn n.consumeCh\n}\n\n// LocalAddr implements the Transport interface.\nfunc (n *NetworkTransport) LocalAddr() ServerAddress {\n\treturn ServerAddress(n.stream.Addr().String())\n}\n\n// IsShutdown is used to check if the transport is shutdown.\nfunc (n *NetworkTransport) IsShutdown() bool {\n\tselect {\n\tcase <-n.shutdownCh:\n\t\treturn true\n\tdefault:\n\t\treturn false\n\t}\n}\n\n// getExistingConn is used to grab a pooled connection.\nfunc (n *NetworkTransport) getPooledConn(target ServerAddress) *netConn {\n\tn.connPoolLock.Lock()\n\tdefer n.connPoolLock.Unlock()\n\n\tconns, ok := n.connPool[target]\n\tif !ok || len(conns) == 0 {\n\t\treturn nil\n\t}\n\n\tvar conn *netConn\n\tnum := len(conns)\n\tconn, conns[num-1] = conns[num-1], nil\n\tn.connPool[target] = conns[:num-1]\n\treturn conn\n}\n\n// getConnFromAddressProvider returns a connection from the server address provider if available, or defaults to a connection using the target server address\nfunc (n *NetworkTransport) getConnFromAddressProvider(id ServerID, target ServerAddress) (*netConn, error) {\n\taddress := n.getProviderAddressOrFallback(id, target)\n\treturn n.getConn(address)\n}\n\nfunc (n *NetworkTransport) getProviderAddressOrFallback(id ServerID, target ServerAddress) ServerAddress {\n\tn.serverAddressLock.RLock()\n\tdefer n.serverAddressLock.RUnlock()\n\tif n.serverAddressProvider != nil {\n\t\tserverAddressOverride, err := n.serverAddressProvider.ServerAddr(id)\n\t\tif err != nil {\n\t\t\tn.logger.Warn(\"unable to get address for server, using fallback address\", \"id\", id, \"fallback\", target, \"error\", err)\n\t\t} else {\n\t\t\treturn serverAddressOverride\n\t\t}\n\t}\n\treturn target\n}\n\n// getConn is used to get a connection from the pool.\nfunc (n *NetworkTransport) getConn(target ServerAddress) (*netConn, error) {\n\t// Check for a pooled conn\n\tif conn := n.getPooledConn(target); conn != nil {\n\t\treturn conn, nil\n\t}\n\n\t// Dial a new connection\n\tconn, err := n.stream.Dial(target, n.timeout)\n\tif err != nil {\n\t\treturn nil, err\n\t}\n\n\t// Wrap the conn\n\tnetConn := &netConn{\n\t\ttarget: target,\n\t\tconn:   conn,\n\t\tdec:    codec.NewDecoder(bufio.NewReader(conn), &codec.MsgpackHandle{}),\n\t\tw:      bufio.NewWriterSize(conn, connSendBufferSize),\n\t}\n\n\tmp := &codec.MsgpackHandle{}\n\tmp.TimeNotBuiltin = !n.msgpackUseNewTimeFormat\n\tnetConn.enc = codec.NewEncoder(netConn.w, mp)\n\n\t// Done\n\treturn netConn, nil\n}\n\n// returnConn returns a connection back to the pool.\nfunc (n *NetworkTransport) returnConn(conn *netConn) {\n\tn.connPoolLock.Lock()\n\tdefer n.connPoolLock.Unlock()\n\n\tkey := conn.target\n\tconns := n.connPool[key]\n\n\tif !n.IsShutdown() && len(conns) < n.maxPool {\n\t\tn.connPool[key] = append(conns, conn)\n\t} else {\n\t\t_ = conn.Release()\n\t}\n}\n\n// AppendEntriesPipeline returns an interface that can be used to pipeline\n// AppendEntries requests.\nfunc (n *NetworkTransport) AppendEntriesPipeline(id ServerID, target ServerAddress) (AppendPipeline, error) {\n\tif n.maxInFlight < minInFlightForPipelining {\n\t\t// Pipelining is disabled since no more than one request can be outstanding\n\t\t// at once. Skip the whole code path and use synchronous requests.\n\t\treturn nil, ErrPipelineReplicationNotSupported\n\t}\n\n\t// Get a connection\n\tconn, err := n.getConnFromAddressProvider(id, target)\n\tif err != nil {\n\t\treturn nil, err\n\t}\n\n\t// Create the pipeline\n\treturn newNetPipeline(n, conn, n.maxInFlight), nil\n}\n\n// AppendEntries implements the Transport interface.\nfunc (n *NetworkTransport) AppendEntries(id ServerID, target ServerAddress, args *AppendEntriesRequest, resp *AppendEntriesResponse) error {\n\treturn n.genericRPC(id, target, rpcAppendEntries, args, resp)\n}\n\n// RequestVote implements the Transport interface.\nfunc (n *NetworkTransport) RequestVote(id ServerID, target ServerAddress, args *RequestVoteRequest, resp *RequestVoteResponse) error {\n\treturn n.genericRPC(id, target, rpcRequestVote, args, resp)\n}\n\n// RequestPreVote implements the Transport interface.\nfunc (n *NetworkTransport) RequestPreVote(id ServerID, target ServerAddress, args *RequestPreVoteRequest, resp *RequestPreVoteResponse) error {\n\treturn n.genericRPC(id, target, rpcRequestPreVote, args, resp)\n}\n\n// genericRPC handles a simple request/response RPC.\nfunc (n *NetworkTransport) genericRPC(id ServerID, target ServerAddress, rpcType uint8, args interface{}, resp interface{}) error {\n\t// Get a conn\n\tconn, err := n.getConnFromAddressProvider(id, target)\n\tif err != nil {\n\t\treturn err\n\t}\n\n\t// Set a deadline\n\tif n.timeout > 0 {\n\t\t_ = conn.conn.SetDeadline(time.Now().Add(n.timeout))\n\t}\n\n\t// Send the RPC\n\tif err = sendRPC(conn, rpcType, args); err != nil {\n\t\treturn err\n\t}\n\n\t// Decode the response\n\tcanReturn, err := decodeResponse(conn, resp)\n\tif canReturn {\n\t\tn.returnConn(conn)\n\t}\n\treturn err\n}\n\n// InstallSnapshot implements the Transport interface.\nfunc (n *NetworkTransport) InstallSnapshot(id ServerID, target ServerAddress, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error {\n\t// Get a conn, always close for InstallSnapshot\n\tconn, err := n.getConnFromAddressProvider(id, target)\n\tif err != nil {\n\t\treturn err\n\t}\n\tdefer func() { _ = conn.Release() }()\n\n\t// Set a deadline, scaled by request size\n\tif n.timeout > 0 {\n\t\ttimeout := n.timeout * time.Duration(args.Size/int64(n.TimeoutScale))\n\t\tif timeout < n.timeout {\n\t\t\ttimeout = n.timeout\n\t\t}\n\t\t_ = conn.conn.SetDeadline(time.Now().Add(timeout))\n\t}\n\n\t// Send the RPC\n\tif err = sendRPC(conn, rpcInstallSnapshot, args); err != nil {\n\t\treturn err\n\t}\n\n\t// Stream the state\n\tif _, err = io.Copy(conn.w, data); err != nil {\n\t\treturn err\n\t}\n\n\t// Flush\n\tif err = conn.w.Flush(); err != nil {\n\t\treturn err\n\t}\n\n\t// Decode the response, do not return conn\n\t_, err = decodeResponse(conn, resp)\n\treturn err\n}\n\n// EncodePeer implements the Transport interface.\nfunc (n *NetworkTransport) EncodePeer(id ServerID, p ServerAddress) []byte {\n\taddress := n.getProviderAddressOrFallback(id, p)\n\treturn []byte(address)\n}\n\n// DecodePeer implements the Transport interface.\nfunc (n *NetworkTransport) DecodePeer(buf []byte) ServerAddress {\n\treturn ServerAddress(buf)\n}\n\n// TimeoutNow implements the Transport interface.\nfunc (n *NetworkTransport) TimeoutNow(id ServerID, target ServerAddress, args *TimeoutNowRequest, resp *TimeoutNowResponse) error {\n\treturn n.genericRPC(id, target, rpcTimeoutNow, args, resp)\n}\n\n// listen is used to handling incoming connections.\nfunc (n *NetworkTransport) listen() {\n\tconst baseDelay = 5 * time.Millisecond\n\tconst maxDelay = 1 * time.Second\n\n\tvar loopDelay time.Duration\n\tfor {\n\t\t// Accept incoming connections\n\t\tconn, err := n.stream.Accept()\n\t\tif err != nil {\n\t\t\tif loopDelay == 0 {\n\t\t\t\tloopDelay = baseDelay\n\t\t\t} else {\n\t\t\t\tloopDelay *= 2\n\t\t\t}\n\n\t\t\tif loopDelay > maxDelay {\n\t\t\t\tloopDelay = maxDelay\n\t\t\t}\n\n\t\t\tif !n.IsShutdown() {\n\t\t\t\tn.logger.Error(\"failed to accept connection\", \"error\", err)\n\t\t\t}\n\n\t\t\tselect {\n\t\t\tcase <-n.shutdownCh:\n\t\t\t\treturn\n\t\t\tcase <-time.After(loopDelay):\n\t\t\t\tcontinue\n\t\t\t}\n\t\t}\n\t\t// No error, reset loop delay\n\t\tloopDelay = 0\n\n\t\tn.logger.Debug(\"accepted connection\", \"local-address\", n.LocalAddr(), \"remote-address\", conn.RemoteAddr().String())\n\n\t\t// Handle the connection in dedicated routine\n\t\tgo n.handleConn(n.getStreamContext(), conn)\n\t}\n}\n\n// handleConn is used to handle an inbound connection for its lifespan. The\n// handler will exit when the passed context is cancelled or the connection is\n// closed.\nfunc (n *NetworkTransport) handleConn(connCtx context.Context, conn net.Conn) {\n\tdefer func() { _ = conn.Close() }()\n\tr := bufio.NewReaderSize(conn, connReceiveBufferSize)\n\tw := bufio.NewWriter(conn)\n\tdec := codec.NewDecoder(r, &codec.MsgpackHandle{})\n\n\tmp := &codec.MsgpackHandle{}\n\tmp.TimeNotBuiltin = !n.msgpackUseNewTimeFormat\n\tenc := codec.NewEncoder(w, mp)\n\n\tfor {\n\t\tselect {\n\t\tcase <-connCtx.Done():\n\t\t\tn.logger.Debug(\"stream layer is closed\")\n\t\t\treturn\n\t\tdefault:\n\t\t}\n\n\t\tif err := n.handleCommand(r, dec, enc); err != nil {\n\t\t\tif err != io.EOF {\n\t\t\t\tn.logger.Error(\"failed to decode incoming command\", \"error\", err)\n\t\t\t}\n\t\t\treturn\n\t\t}\n\t\tif err := w.Flush(); err != nil {\n\t\t\tn.logger.Error(\"failed to flush response\", \"error\", err)\n\t\t\treturn\n\t\t}\n\t}\n}\n\n// handleCommand is used to decode and dispatch a single command.\nfunc (n *NetworkTransport) handleCommand(r *bufio.Reader, dec *codec.Decoder, enc *codec.Encoder) error {\n\tgetTypeStart := time.Now()\n\n\t// Get the rpc type\n\trpcType, err := r.ReadByte()\n\tif err != nil {\n\t\treturn err\n\t}\n\n\t// measuring the time to get the first byte separately because the heartbeat conn will hang out here\n\t// for a good while waiting for a heartbeat whereas the append entries/rpc conn should not.\n\tmetrics.MeasureSince([]string{\"raft\", \"net\", \"getRPCType\"}, getTypeStart)\n\tdecodeStart := time.Now()\n\n\t// Create the RPC object\n\trespCh := make(chan RPCResponse, 1)\n\trpc := RPC{\n\t\tRespChan: respCh,\n\t}\n\n\t// Decode the command\n\tisHeartbeat := false\n\tvar labels []metrics.Label\n\tswitch rpcType {\n\tcase rpcAppendEntries:\n\t\tvar req AppendEntriesRequest\n\t\tif err := dec.Decode(&req); err != nil {\n\t\t\treturn err\n\t\t}\n\t\trpc.Command = &req\n\n\t\tleaderAddr := req.Addr\n\t\tif len(leaderAddr) == 0 {\n\t\t\tleaderAddr = req.Leader\n\t\t}\n\n\t\t// Check if this is a heartbeat\n\t\tif req.Term != 0 && leaderAddr != nil &&\n\t\t\treq.PrevLogEntry == 0 && req.PrevLogTerm == 0 &&\n\t\t\tlen(req.Entries) == 0 && req.LeaderCommitIndex == 0 {\n\t\t\tisHeartbeat = true\n\t\t}\n\n\t\tif isHeartbeat {\n\t\t\tlabels = []metrics.Label{{Name: \"rpcType\", Value: \"Heartbeat\"}}\n\t\t} else {\n\t\t\tlabels = []metrics.Label{{Name: \"rpcType\", Value: \"AppendEntries\"}}\n\t\t}\n\tcase rpcRequestVote:\n\t\tvar req RequestVoteRequest\n\t\tif err := dec.Decode(&req); err != nil {\n\t\t\treturn err\n\t\t}\n\t\trpc.Command = &req\n\t\tlabels = []metrics.Label{{Name: \"rpcType\", Value: \"RequestVote\"}}\n\tcase rpcRequestPreVote:\n\t\tvar req RequestPreVoteRequest\n\t\tif err := dec.Decode(&req); err != nil {\n\t\t\treturn err\n\t\t}\n\t\trpc.Command = &req\n\t\tlabels = []metrics.Label{{Name: \"rpcType\", Value: \"RequestPreVote\"}}\n\tcase rpcInstallSnapshot:\n\t\tvar req InstallSnapshotRequest\n\t\tif err := dec.Decode(&req); err != nil {\n\t\t\treturn err\n\t\t}\n\t\trpc.Command = &req\n\t\trpc.Reader = io.LimitReader(r, req.Size)\n\t\tlabels = []metrics.Label{{Name: \"rpcType\", Value: \"InstallSnapshot\"}}\n\tcase rpcTimeoutNow:\n\t\tvar req TimeoutNowRequest\n\t\tif err := dec.Decode(&req); err != nil {\n\t\t\treturn err\n\t\t}\n\t\trpc.Command = &req\n\t\tlabels = []metrics.Label{{Name: \"rpcType\", Value: \"TimeoutNow\"}}\n\tdefault:\n\t\treturn fmt.Errorf(\"unknown rpc type %d\", rpcType)\n\t}\n\n\tmetrics.MeasureSinceWithLabels([]string{\"raft\", \"net\", \"rpcDecode\"}, decodeStart, labels)\n\n\tprocessStart := time.Now()\n\n\t// Check for heartbeat fast-path\n\tif isHeartbeat {\n\t\tn.heartbeatFnLock.Lock()\n\t\tfn := n.heartbeatFn\n\t\tn.heartbeatFnLock.Unlock()\n\t\tif fn != nil {\n\t\t\tfn(rpc)\n\t\t\tgoto RESP\n\t\t}\n\t}\n\n\t// Dispatch the RPC\n\tselect {\n\tcase n.consumeCh <- rpc:\n\tcase <-n.shutdownCh:\n\t\treturn ErrTransportShutdown\n\t}\n\n\t// Wait for response\nRESP:\n\t// we will differentiate the heartbeat fast path from normal RPCs with labels\n\tmetrics.MeasureSinceWithLabels([]string{\"raft\", \"net\", \"rpcEnqueue\"}, processStart, labels)\n\trespWaitStart := time.Now()\n\tselect {\n\tcase resp := <-respCh:\n\t\tdefer metrics.MeasureSinceWithLabels([]string{\"raft\", \"net\", \"rpcRespond\"}, respWaitStart, labels)\n\t\t// Send the error first\n\t\trespErr := \"\"\n\t\tif resp.Error != nil {\n\t\t\trespErr = resp.Error.Error()\n\t\t}\n\t\tif err := enc.Encode(respErr); err != nil {\n\t\t\treturn err\n\t\t}\n\n\t\t// Send the response\n\t\tif err := enc.Encode(resp.Response); err != nil {\n\t\t\treturn err\n\t\t}\n\tcase <-n.shutdownCh:\n\t\treturn ErrTransportShutdown\n\t}\n\treturn nil\n}\n\n// decodeResponse is used to decode an RPC response and reports whether\n// the connection can be reused.\nfunc decodeResponse(conn *netConn, resp interface{}) (bool, error) {\n\t// Decode the error if any\n\tvar rpcError string\n\tif err := conn.dec.Decode(&rpcError); err != nil {\n\t\t_ = conn.Release()\n\t\treturn false, err\n\t}\n\n\t// Decode the response\n\tif err := conn.dec.Decode(resp); err != nil {\n\t\t_ = conn.Release()\n\t\treturn false, err\n\t}\n\n\t// Format an error if any\n\tif rpcError != \"\" {\n\t\treturn true, errors.New(rpcError)\n\t}\n\treturn true, nil\n}\n\n// sendRPC is used to encode and send the RPC.\nfunc sendRPC(conn *netConn, rpcType uint8, args interface{}) error {\n\t// Write the request type\n\tif err := conn.w.WriteByte(rpcType); err != nil {\n\t\t_ = conn.Release()\n\t\treturn err\n\t}\n\n\t// Send the request\n\tif err := conn.enc.Encode(args); err != nil {\n\t\t_ = conn.Release()\n\t\treturn err\n\t}\n\n\t// Flush\n\tif err := conn.w.Flush(); err != nil {\n\t\t_ = conn.Release()\n\t\treturn err\n\t}\n\treturn nil\n}\n\n// newNetPipeline is used to construct a netPipeline from a given transport and\n// connection. It is a bug to ever call this with maxInFlight less than 2\n// (minInFlightForPipelining) and will cause a panic.\nfunc newNetPipeline(trans *NetworkTransport, conn *netConn, maxInFlight int) *netPipeline {\n\tif maxInFlight < minInFlightForPipelining {\n\t\t// Shouldn't happen (tm) since we validate this in the one call site and\n\t\t// skip pipelining if it's lower.\n\t\tpanic(\"pipelining makes no sense if maxInFlight < 2\")\n\t}\n\tn := &netPipeline{\n\t\tconn:  conn,\n\t\ttrans: trans,\n\t\t// The buffer size is 2 less than the configured max because we send before\n\t\t// waiting on the channel and the decode routine unblocks the channel as\n\t\t// soon as it's waiting on the first request. So a zero-buffered channel\n\t\t// still allows 1 request to be sent even while decode is still waiting for\n\t\t// a response from the previous one. i.e. two are inflight at the same time.\n\t\tinprogressCh: make(chan *appendFuture, maxInFlight-2),\n\t\tdoneCh:       make(chan AppendFuture, maxInFlight-2),\n\t\tshutdownCh:   make(chan struct{}),\n\t}\n\tgo n.decodeResponses()\n\treturn n\n}\n\n// decodeResponses is a long running routine that decodes the responses\n// sent on the connection.\nfunc (n *netPipeline) decodeResponses() {\n\ttimeout := n.trans.timeout\n\tfor {\n\t\tselect {\n\t\tcase future := <-n.inprogressCh:\n\t\t\tif timeout > 0 {\n\t\t\t\t_ = n.conn.conn.SetReadDeadline(time.Now().Add(timeout))\n\t\t\t}\n\n\t\t\t_, err := decodeResponse(n.conn, future.resp)\n\t\t\tfuture.respond(err)\n\t\t\tselect {\n\t\t\tcase n.doneCh <- future:\n\t\t\tcase <-n.shutdownCh:\n\t\t\t\treturn\n\t\t\t}\n\t\tcase <-n.shutdownCh:\n\t\t\treturn\n\t\t}\n\t}\n}\n\n// AppendEntries is used to pipeline a new append entries request.\nfunc (n *netPipeline) AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error) {\n\t// Create a new future\n\tfuture := &appendFuture{\n\t\tstart: time.Now(),\n\t\targs:  args,\n\t\tresp:  resp,\n\t}\n\tfuture.init()\n\n\t// Add a send timeout\n\tif timeout := n.trans.timeout; timeout > 0 {\n\t\t_ = n.conn.conn.SetWriteDeadline(time.Now().Add(timeout))\n\t}\n\n\t// Send the RPC\n\tif err := sendRPC(n.conn, rpcAppendEntries, future.args); err != nil {\n\t\treturn nil, err\n\t}\n\n\t// Hand-off for decoding, this can also cause back-pressure\n\t// to prevent too many inflight requests\n\tselect {\n\tcase n.inprogressCh <- future:\n\t\treturn future, nil\n\tcase <-n.shutdownCh:\n\t\treturn nil, ErrPipelineShutdown\n\t}\n}\n\n// Consumer returns a channel that can be used to consume complete futures.\nfunc (n *netPipeline) Consumer() <-chan AppendFuture {\n\treturn n.doneCh\n}\n\n// Close is used to shut down the pipeline connection.\nfunc (n *netPipeline) Close() error {\n\tn.shutdownLock.Lock()\n\tdefer n.shutdownLock.Unlock()\n\tif n.shutdown {\n\t\treturn nil\n\t}\n\n\t// Release the connection\n\t_ = n.conn.Release()\n\n\tn.shutdown = true\n\tclose(n.shutdownCh)\n\treturn nil\n}\n"
  },
  {
    "path": "net_transport_test.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"bytes\"\n\t\"context\"\n\t\"fmt\"\n\t\"net\"\n\t\"reflect\"\n\t\"strings\"\n\t\"sync\"\n\t\"sync/atomic\"\n\t\"testing\"\n\t\"time\"\n\n\t\"github.com/hashicorp/go-hclog\"\n\t\"github.com/stretchr/testify/require\"\n)\n\ntype testAddrProvider struct {\n\taddr string\n}\n\nfunc (t *testAddrProvider) ServerAddr(id ServerID) (ServerAddress, error) {\n\treturn ServerAddress(t.addr), nil\n}\n\nfunc TestNetworkTransport_CloseStreams(t *testing.T) {\n\t// Transport 1 is consumer\n\ttrans1, err := NewTCPTransportWithLogger(\"localhost:0\", nil, 2, time.Second, newTestLogger(t))\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tdefer func() { _ = trans1.Close() }()\n\trpcCh := trans1.Consumer()\n\n\t// Make the RPC request\n\targs := AppendEntriesRequest{\n\t\tTerm:         10,\n\t\tPrevLogEntry: 100,\n\t\tPrevLogTerm:  4,\n\t\tEntries: []*Log{\n\t\t\t{\n\t\t\t\tIndex: 101,\n\t\t\t\tTerm:  4,\n\t\t\t\tType:  LogNoop,\n\t\t\t},\n\t\t},\n\t\tLeaderCommitIndex: 90,\n\t\tRPCHeader:         RPCHeader{Addr: []byte(\"cartman\")},\n\t}\n\n\tresp := AppendEntriesResponse{\n\t\tTerm:    4,\n\t\tLastLog: 90,\n\t\tSuccess: true,\n\t}\n\n\t// errCh is used to report errors from any of the goroutines\n\t// created in this test.\n\t// It is buffered as to not block.\n\terrCh := make(chan error, 100)\n\n\t// Listen for a request\n\tgo func() {\n\t\tfor {\n\t\t\tselect {\n\t\t\tcase rpc := <-rpcCh:\n\t\t\t\t// Verify the command\n\t\t\t\treq := rpc.Command.(*AppendEntriesRequest)\n\t\t\t\tif !reflect.DeepEqual(req, &args) {\n\t\t\t\t\terrCh <- fmt.Errorf(\"command mismatch: %#v %#v\", *req, args)\n\t\t\t\t\treturn\n\t\t\t\t}\n\t\t\t\trpc.Respond(&resp, nil)\n\n\t\t\tcase <-time.After(200 * time.Millisecond):\n\t\t\t\treturn\n\t\t\t}\n\t\t}\n\t}()\n\n\t// Transport 2 makes outbound request, 3 conn pool\n\ttrans2, err := NewTCPTransportWithLogger(\"localhost:0\", nil, 3, time.Second, newTestLogger(t))\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tdefer func() { _ = trans2.Close() }()\n\n\tfor i := 0; i < 2; i++ {\n\t\t// Create wait group\n\t\twg := &sync.WaitGroup{}\n\n\t\t// Try to do parallel appends, should stress the conn pool\n\t\tfor i = 0; i < 5; i++ {\n\t\t\twg.Add(1)\n\t\t\tgo func() {\n\t\t\t\tdefer wg.Done()\n\t\t\t\tvar out AppendEntriesResponse\n\t\t\t\tif err := trans2.AppendEntries(\"id1\", trans1.LocalAddr(), &args, &out); err != nil {\n\t\t\t\t\terrCh <- err\n\t\t\t\t\treturn\n\t\t\t\t}\n\n\t\t\t\t// Verify the response\n\t\t\t\tif !reflect.DeepEqual(resp, out) {\n\t\t\t\t\terrCh <- fmt.Errorf(\"command mismatch: %#v %#v\", resp, out)\n\t\t\t\t\treturn\n\t\t\t\t}\n\t\t\t}()\n\t\t}\n\n\t\t// Wait for the routines to finish\n\t\twg.Wait()\n\n\t\t// Check if we received any errors from the above goroutines.\n\t\tif len(errCh) > 0 {\n\t\t\tt.Fatal(<-errCh)\n\t\t}\n\n\t\t// Check the conn pool size\n\t\taddr := trans1.LocalAddr()\n\t\tif len(trans2.connPool[addr]) != 3 {\n\t\t\tt.Fatalf(\"Expected 3 pooled conns!\")\n\t\t}\n\n\t\tif i == 0 {\n\t\t\ttrans2.CloseStreams()\n\t\t\tif len(trans2.connPool[addr]) != 0 {\n\t\t\t\tt.Fatalf(\"Expected no pooled conns after closing streams!\")\n\t\t\t}\n\t\t}\n\t}\n}\n\nfunc TestNetworkTransport_StartStop(t *testing.T) {\n\ttrans, err := NewTCPTransportWithLogger(\"localhost:0\", nil, 2, time.Second, newTestLogger(t))\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\t_ = trans.Close()\n}\n\nfunc TestNetworkTransport_Heartbeat_FastPath(t *testing.T) {\n\t// Transport 1 is consumer\n\ttrans1, err := NewTCPTransportWithLogger(\"localhost:0\", nil, 2, time.Second, newTestLogger(t))\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tdefer func() { _ = trans1.Close() }()\n\n\t// Make the RPC request\n\targs := AppendEntriesRequest{\n\t\tTerm:      10,\n\t\tRPCHeader: RPCHeader{ProtocolVersion: ProtocolVersionMax, Addr: []byte(\"cartman\")},\n\t\tLeader:    []byte(\"cartman\"),\n\t}\n\n\tresp := AppendEntriesResponse{\n\t\tTerm:    4,\n\t\tLastLog: 90,\n\t\tSuccess: true,\n\t}\n\n\tinvoked := false\n\tfastpath := func(rpc RPC) {\n\t\t// Verify the command\n\t\treq := rpc.Command.(*AppendEntriesRequest)\n\t\tif !reflect.DeepEqual(req, &args) {\n\t\t\tt.Fatalf(\"command mismatch: %#v %#v\", *req, args)\n\t\t}\n\n\t\trpc.Respond(&resp, nil)\n\t\tinvoked = true\n\t}\n\ttrans1.SetHeartbeatHandler(fastpath)\n\n\t// Transport 2 makes outbound request\n\ttrans2, err := NewTCPTransportWithLogger(\"localhost:0\", nil, 2, time.Second, newTestLogger(t))\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tdefer func() { _ = trans2.Close() }()\n\n\tvar out AppendEntriesResponse\n\tif err := trans2.AppendEntries(\"id1\", trans1.LocalAddr(), &args, &out); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Verify the response\n\tif !reflect.DeepEqual(resp, out) {\n\t\tt.Fatalf(\"command mismatch: %#v %#v\", resp, out)\n\t}\n\n\t// Ensure fast-path is used\n\tif !invoked {\n\t\tt.Fatalf(\"fast-path not used\")\n\t}\n}\n\nfunc makeAppendRPC() AppendEntriesRequest {\n\treturn AppendEntriesRequest{\n\t\tTerm:         10,\n\t\tPrevLogEntry: 100,\n\t\tPrevLogTerm:  4,\n\t\tEntries: []*Log{\n\t\t\t{\n\t\t\t\tIndex: 101,\n\t\t\t\tTerm:  4,\n\t\t\t\tType:  LogNoop,\n\t\t\t},\n\t\t},\n\t\tLeaderCommitIndex: 90,\n\t\tRPCHeader:         RPCHeader{Addr: []byte(\"cartman\")},\n\t}\n}\n\nfunc makeAppendRPCResponse() AppendEntriesResponse {\n\treturn AppendEntriesResponse{\n\t\tTerm:    4,\n\t\tLastLog: 90,\n\t\tSuccess: true,\n\t}\n}\n\nfunc TestNetworkTransport_AppendEntries(t *testing.T) {\n\tfor _, useAddrProvider := range []bool{true, false} {\n\t\t// Transport 1 is consumer\n\t\ttrans1, err := makeTransport(t, useAddrProvider, \"localhost:0\")\n\t\tif err != nil {\n\t\t\tt.Fatalf(\"err: %v\", err)\n\t\t}\n\t\tdefer func() { _ = trans1.Close() }()\n\t\trpcCh := trans1.Consumer()\n\n\t\t// Make the RPC request\n\t\targs := makeAppendRPC()\n\t\tresp := makeAppendRPCResponse()\n\n\t\t// Listen for a request\n\t\tgo func() {\n\t\t\tselect {\n\t\t\tcase rpc := <-rpcCh:\n\t\t\t\t// Verify the command\n\t\t\t\treq := rpc.Command.(*AppendEntriesRequest)\n\t\t\t\tif !reflect.DeepEqual(req, &args) {\n\t\t\t\t\tt.Errorf(\"command mismatch: %#v %#v\", *req, args)\n\t\t\t\t\treturn\n\t\t\t\t}\n\n\t\t\t\trpc.Respond(&resp, nil)\n\n\t\t\tcase <-time.After(200 * time.Millisecond):\n\t\t\t\tt.Errorf(\"timeout\")\n\t\t\t}\n\t\t}()\n\n\t\t// Transport 2 makes outbound request\n\t\ttrans2, err := makeTransport(t, useAddrProvider, string(trans1.LocalAddr()))\n\t\tif err != nil {\n\t\t\tt.Fatalf(\"err: %v\", err)\n\t\t}\n\t\tdefer func() { _ = trans2.Close() }()\n\n\t\tvar out AppendEntriesResponse\n\t\tif err := trans2.AppendEntries(\"id1\", trans1.LocalAddr(), &args, &out); err != nil {\n\t\t\tt.Fatalf(\"err: %v\", err)\n\t\t}\n\n\t\t// Verify the response\n\t\tif !reflect.DeepEqual(resp, out) {\n\t\t\tt.Fatalf(\"command mismatch: %#v %#v\", resp, out)\n\t\t}\n\n\t}\n}\n\nfunc TestNetworkTransport_AppendEntriesPipeline(t *testing.T) {\n\tfor _, useAddrProvider := range []bool{true, false} {\n\t\t// Transport 1 is consumer\n\t\ttrans1, err := makeTransport(t, useAddrProvider, \"localhost:0\")\n\t\tif err != nil {\n\t\t\tt.Fatalf(\"err: %v\", err)\n\t\t}\n\t\tdefer func() { _ = trans1.Close() }()\n\t\trpcCh := trans1.Consumer()\n\n\t\t// Make the RPC request\n\t\targs := makeAppendRPC()\n\t\tresp := makeAppendRPCResponse()\n\n\t\t// Listen for a request\n\t\tgo func() {\n\t\t\tfor i := 0; i < 10; i++ {\n\t\t\t\tselect {\n\t\t\t\tcase rpc := <-rpcCh:\n\t\t\t\t\t// Verify the command\n\t\t\t\t\treq := rpc.Command.(*AppendEntriesRequest)\n\t\t\t\t\tif !reflect.DeepEqual(req, &args) {\n\t\t\t\t\t\tt.Errorf(\"command mismatch: %#v %#v\", *req, args)\n\t\t\t\t\t\treturn\n\t\t\t\t\t}\n\t\t\t\t\trpc.Respond(&resp, nil)\n\n\t\t\t\tcase <-time.After(200 * time.Millisecond):\n\t\t\t\t\tt.Errorf(\"timeout\")\n\t\t\t\t\treturn\n\t\t\t\t}\n\t\t\t}\n\t\t}()\n\n\t\t// Transport 2 makes outbound request\n\t\ttrans2, err := makeTransport(t, useAddrProvider, string(trans1.LocalAddr()))\n\t\tif err != nil {\n\t\t\tt.Fatalf(\"err: %v\", err)\n\t\t}\n\t\tdefer func() { _ = trans2.Close() }()\n\t\tpipeline, err := trans2.AppendEntriesPipeline(\"id1\", trans1.LocalAddr())\n\t\tif err != nil {\n\t\t\tt.Fatalf(\"err: %v\", err)\n\t\t}\n\n\t\tfor i := 0; i < 10; i++ {\n\t\t\tout := new(AppendEntriesResponse)\n\t\t\tif _, err := pipeline.AppendEntries(&args, out); err != nil {\n\t\t\t\tt.Fatalf(\"err: %v\", err)\n\t\t\t}\n\t\t}\n\n\t\trespCh := pipeline.Consumer()\n\t\tfor i := 0; i < 10; i++ {\n\t\t\tselect {\n\t\t\tcase ready := <-respCh:\n\t\t\t\t// Verify the response\n\t\t\t\tif !reflect.DeepEqual(&resp, ready.Response()) {\n\t\t\t\t\tt.Fatalf(\"command mismatch: %#v %#v\", &resp, ready.Response())\n\t\t\t\t}\n\t\t\tcase <-time.After(200 * time.Millisecond):\n\t\t\t\tt.Fatalf(\"timeout\")\n\t\t\t}\n\t\t}\n\t\t_ = pipeline.Close()\n\n\t}\n}\n\nfunc TestNetworkTransport_AppendEntriesPipeline_CloseStreams(t *testing.T) {\n\t// Transport 1 is consumer\n\ttrans1, err := makeTransport(t, true, \"localhost:0\")\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tdefer func() { _ = trans1.Close() }()\n\trpcCh := trans1.Consumer()\n\n\t// Make the RPC request\n\targs := makeAppendRPC()\n\tresp := makeAppendRPCResponse()\n\n\tshutdownCh := make(chan struct{})\n\tdefer close(shutdownCh)\n\n\t// Listen for a request\n\tgo func() {\n\t\tfor {\n\t\t\tselect {\n\t\t\tcase rpc := <-rpcCh:\n\t\t\t\t// Verify the command\n\t\t\t\treq := rpc.Command.(*AppendEntriesRequest)\n\t\t\t\tif !reflect.DeepEqual(req, &args) {\n\t\t\t\t\tt.Errorf(\"command mismatch: %#v %#v\", *req, args)\n\t\t\t\t\treturn\n\t\t\t\t}\n\t\t\t\trpc.Respond(&resp, nil)\n\n\t\t\tcase <-shutdownCh:\n\t\t\t\treturn\n\t\t\t}\n\t\t}\n\t}()\n\n\t// Transport 2 makes outbound request\n\ttrans2, err := makeTransport(t, true, string(trans1.LocalAddr()))\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tdefer func() { _ = trans2.Close() }()\n\n\tfor _, cancelStreams := range []bool{true, false} {\n\t\tpipeline, err := trans2.AppendEntriesPipeline(\"id1\", trans1.LocalAddr())\n\t\tif err != nil {\n\t\t\tt.Fatalf(\"err: %v\", err)\n\t\t}\n\n\t\tfor i := 0; i < 100; i++ {\n\t\t\t// On the last one, close the streams on the transport one.\n\t\t\tif cancelStreams && i == 10 {\n\t\t\t\ttrans1.CloseStreams()\n\t\t\t\ttime.Sleep(10 * time.Millisecond)\n\t\t\t}\n\n\t\t\tout := new(AppendEntriesResponse)\n\t\t\tif _, err := pipeline.AppendEntries(&args, out); err != nil {\n\t\t\t\tbreak\n\t\t\t}\n\t\t}\n\n\t\tvar futureErr error\n\t\trespCh := pipeline.Consumer()\n\tOUTER:\n\t\tfor i := 0; i < 100; i++ {\n\t\t\tselect {\n\t\t\tcase ready := <-respCh:\n\t\t\t\tif err := ready.Error(); err != nil {\n\t\t\t\t\tfutureErr = err\n\t\t\t\t\tbreak OUTER\n\t\t\t\t}\n\n\t\t\t\t// Verify the response\n\t\t\t\tif !reflect.DeepEqual(&resp, ready.Response()) {\n\t\t\t\t\tt.Fatalf(\"command mismatch: %#v %#v %v\", &resp, ready.Response(), ready.Error())\n\t\t\t\t}\n\t\t\tcase <-time.After(200 * time.Millisecond):\n\t\t\t\tt.Fatalf(\"timeout when cancel streams is %v\", cancelStreams)\n\t\t\t}\n\t\t}\n\n\t\tif cancelStreams && futureErr == nil {\n\t\t\tt.Fatalf(\"expected an error due to the streams being closed\")\n\t\t} else if !cancelStreams && futureErr != nil {\n\t\t\tt.Fatalf(\"unexpected error: %v\", futureErr)\n\t\t}\n\n\t\t_ = pipeline.Close()\n\t}\n}\n\nfunc TestNetworkTransport_AppendEntriesPipeline_MaxRPCsInFlight(t *testing.T) {\n\t// Test the important cases 0 (default to 2), 1 (disabled), 2 and \"some\"\n\tfor _, max := range []int{0, 1, 2, 10} {\n\t\tt.Run(fmt.Sprintf(\"max=%d\", max), func(t *testing.T) {\n\t\t\tconfig := &NetworkTransportConfig{\n\t\t\t\tMaxPool:         2,\n\t\t\t\tMaxRPCsInFlight: max,\n\t\t\t\tTimeout:         time.Second,\n\t\t\t\t// Don't use test logger as the transport has multiple goroutines and\n\t\t\t\t// causes panics.\n\t\t\t\tServerAddressProvider: &testAddrProvider{\"localhost:0\"},\n\t\t\t}\n\n\t\t\t// Transport 1 is consumer\n\t\t\ttrans1, err := NewTCPTransportWithConfig(\"localhost:0\", nil, config)\n\t\t\trequire.NoError(t, err)\n\t\t\tdefer func() { _ = trans1.Close() }()\n\n\t\t\t// Make the RPC request\n\t\t\targs := makeAppendRPC()\n\t\t\tresp := makeAppendRPCResponse()\n\n\t\t\tctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)\n\t\t\tdefer cancel()\n\n\t\t\t// Transport 2 makes outbound request\n\t\t\tconfig.ServerAddressProvider = &testAddrProvider{string(trans1.LocalAddr())}\n\t\t\ttrans2, err := NewTCPTransportWithConfig(\"localhost:0\", nil, config)\n\t\t\trequire.NoError(t, err)\n\t\t\tdefer func() { _ = trans2.Close() }()\n\n\t\t\t// Kill the transports on the timeout to unblock. That means things that\n\t\t\t// shouldn't have blocked did block.\n\t\t\tgo func() {\n\t\t\t\t<-ctx.Done()\n\t\t\t\t_ = trans2.Close()\n\t\t\t\t_ = trans1.Close()\n\t\t\t}()\n\n\t\t\t// Attempt to pipeline\n\t\t\tpipeline, err := trans2.AppendEntriesPipeline(\"id1\", trans1.LocalAddr())\n\t\t\tif max == 1 {\n\t\t\t\t// Max == 1 implies no pipelining\n\t\t\t\trequire.EqualError(t, err, ErrPipelineReplicationNotSupported.Error())\n\t\t\t\treturn\n\t\t\t}\n\t\t\trequire.NoError(t, err)\n\n\t\t\texpectedMax := max\n\t\t\tif max == 0 {\n\t\t\t\t// Should have defaulted to 2\n\t\t\t\texpectedMax = 2\n\t\t\t}\n\n\t\t\tfor i := 0; i < expectedMax-1; i++ {\n\t\t\t\t// We should be able to send `max - 1` rpcs before `AppendEntries`\n\t\t\t\t// blocks. It blocks on the `max` one because it sends before pushing\n\t\t\t\t// to the chan. It will block forever when it does because nothing is\n\t\t\t\t// responding yet.\n\t\t\t\tout := new(AppendEntriesResponse)\n\t\t\t\t_, err := pipeline.AppendEntries(&args, out)\n\t\t\t\trequire.NoError(t, err)\n\t\t\t}\n\n\t\t\t// Verify the next send blocks without blocking test forever\n\t\t\terrCh := make(chan error, 1)\n\t\t\tgo func() {\n\t\t\t\tout := new(AppendEntriesResponse)\n\t\t\t\t_, err := pipeline.AppendEntries(&args, out)\n\t\t\t\terrCh <- err\n\t\t\t}()\n\n\t\t\tselect {\n\t\t\tcase err := <-errCh:\n\t\t\t\trequire.NoError(t, err)\n\t\t\t\tt.Fatalf(\"AppendEntries didn't block with %d in flight\", max)\n\t\t\tcase <-time.After(50 * time.Millisecond):\n\t\t\t\t// OK it's probably blocked or we got _really_ unlucky with scheduling!\n\t\t\t}\n\n\t\t\t// Verify that once we receive/respond another one can be sent.\n\t\t\trpc := <-trans1.Consumer()\n\t\t\trpc.Respond(resp, nil)\n\n\t\t\t// We also need to consume the response from the pipeline in case chan is\n\t\t\t// unbuffered (inflight is 2 or 1)\n\t\t\t<-pipeline.Consumer()\n\n\t\t\t// The last append should unblock once the response is received.\n\t\t\tselect {\n\t\t\tcase <-errCh:\n\t\t\t\t// OK\n\t\t\tcase <-time.After(50 * time.Millisecond):\n\t\t\t\tt.Fatalf(\"last append didn't unblock\")\n\t\t\t}\n\t\t})\n\t}\n}\n\nfunc TestNetworkTransport_RequestVote(t *testing.T) {\n\tfor _, useAddrProvider := range []bool{true, false} {\n\t\t// Transport 1 is consumer\n\t\ttrans1, err := makeTransport(t, useAddrProvider, \"localhost:0\")\n\t\tif err != nil {\n\t\t\tt.Fatalf(\"err: %v\", err)\n\t\t}\n\t\tdefer func() { _ = trans1.Close() }()\n\t\trpcCh := trans1.Consumer()\n\n\t\t// Make the RPC request\n\t\targs := RequestVoteRequest{\n\t\t\tTerm:         20,\n\t\t\tLastLogIndex: 100,\n\t\t\tLastLogTerm:  19,\n\t\t\tRPCHeader:    RPCHeader{Addr: []byte(\"butters\")},\n\t\t}\n\n\t\tresp := RequestVoteResponse{\n\t\t\tTerm:    100,\n\t\t\tGranted: false,\n\t\t}\n\n\t\t// Listen for a request\n\t\tgo func() {\n\t\t\tselect {\n\t\t\tcase rpc := <-rpcCh:\n\t\t\t\t// Verify the command\n\t\t\t\treq := rpc.Command.(*RequestVoteRequest)\n\t\t\t\tif !reflect.DeepEqual(req, &args) {\n\t\t\t\t\tt.Errorf(\"command mismatch: %#v %#v\", *req, args)\n\t\t\t\t\treturn\n\t\t\t\t}\n\n\t\t\t\trpc.Respond(&resp, nil)\n\n\t\t\tcase <-time.After(200 * time.Millisecond):\n\t\t\t\tt.Errorf(\"timeout\")\n\t\t\t\treturn\n\t\t\t}\n\t\t}()\n\n\t\t// Transport 2 makes outbound request\n\t\ttrans2, err := makeTransport(t, useAddrProvider, string(trans1.LocalAddr()))\n\t\tif err != nil {\n\t\t\tt.Fatalf(\"err: %v\", err)\n\t\t}\n\t\tdefer func() { _ = trans2.Close() }()\n\t\tvar out RequestVoteResponse\n\t\tif err := trans2.RequestVote(\"id1\", trans1.LocalAddr(), &args, &out); err != nil {\n\t\t\tt.Fatalf(\"err: %v\", err)\n\t\t}\n\n\t\t// Verify the response\n\t\tif !reflect.DeepEqual(resp, out) {\n\t\t\tt.Fatalf(\"command mismatch: %#v %#v\", resp, out)\n\t\t}\n\n\t}\n}\n\nfunc TestNetworkTransport_InstallSnapshot(t *testing.T) {\n\tfor _, useAddrProvider := range []bool{true, false} {\n\t\t// Transport 1 is consumer\n\t\ttrans1, err := makeTransport(t, useAddrProvider, \"localhost:0\")\n\t\tif err != nil {\n\t\t\tt.Fatalf(\"err: %v\", err)\n\t\t}\n\t\tdefer func() { _ = trans1.Close() }()\n\t\trpcCh := trans1.Consumer()\n\n\t\t// Make the RPC request\n\t\targs := InstallSnapshotRequest{\n\t\t\tTerm:         10,\n\t\t\tLastLogIndex: 100,\n\t\t\tLastLogTerm:  9,\n\t\t\tPeers:        []byte(\"blah blah\"),\n\t\t\tSize:         10,\n\t\t\tRPCHeader:    RPCHeader{Addr: []byte(\"kyle\")},\n\t\t}\n\n\t\tresp := InstallSnapshotResponse{\n\t\t\tTerm:    10,\n\t\t\tSuccess: true,\n\t\t}\n\n\t\t// Listen for a request\n\t\tgo func() {\n\t\t\tselect {\n\t\t\tcase rpc := <-rpcCh:\n\t\t\t\t// Verify the command\n\t\t\t\treq := rpc.Command.(*InstallSnapshotRequest)\n\t\t\t\tif !reflect.DeepEqual(req, &args) {\n\t\t\t\t\tt.Errorf(\"command mismatch: %#v %#v\", *req, args)\n\t\t\t\t\treturn\n\t\t\t\t}\n\n\t\t\t\t// Try to read the bytes\n\t\t\t\tbuf := make([]byte, 10)\n\t\t\t\t_, _ = rpc.Reader.Read(buf)\n\n\t\t\t\t// Compare\n\t\t\t\tif !bytes.Equal(buf, []byte(\"0123456789\")) {\n\t\t\t\t\tt.Errorf(\"bad buf %v\", buf)\n\t\t\t\t\treturn\n\t\t\t\t}\n\n\t\t\t\trpc.Respond(&resp, nil)\n\n\t\t\tcase <-time.After(200 * time.Millisecond):\n\t\t\t\tt.Errorf(\"timeout\")\n\t\t\t}\n\t\t}()\n\n\t\t// Transport 2 makes outbound request\n\t\ttrans2, err := makeTransport(t, useAddrProvider, string(trans1.LocalAddr()))\n\t\tif err != nil {\n\t\t\tt.Fatalf(\"err: %v\", err)\n\t\t}\n\t\tdefer func() { _ = trans2.Close() }()\n\t\t// Create a buffer\n\t\tbuf := bytes.NewBuffer([]byte(\"0123456789\"))\n\n\t\tvar out InstallSnapshotResponse\n\t\tif err := trans2.InstallSnapshot(\"id1\", trans1.LocalAddr(), &args, &out, buf); err != nil {\n\t\t\tt.Fatalf(\"err: %v\", err)\n\t\t}\n\n\t\t// Verify the response\n\t\tif !reflect.DeepEqual(resp, out) {\n\t\t\tt.Fatalf(\"command mismatch: %#v %#v\", resp, out)\n\t\t}\n\n\t}\n}\n\nfunc TestNetworkTransport_EncodeDecode(t *testing.T) {\n\t// Transport 1 is consumer\n\ttrans1, err := NewTCPTransportWithLogger(\"localhost:0\", nil, 2, time.Second, newTestLogger(t))\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tdefer func() { _ = trans1.Close() }()\n\n\tlocal := trans1.LocalAddr()\n\tenc := trans1.EncodePeer(\"id1\", local)\n\tdec := trans1.DecodePeer(enc)\n\n\tif dec != local {\n\t\tt.Fatalf(\"enc/dec fail: %v %v\", dec, local)\n\t}\n}\n\nfunc TestNetworkTransport_EncodeDecode_AddressProvider(t *testing.T) {\n\taddressOverride := \"localhost:11111\"\n\tconfig := &NetworkTransportConfig{MaxPool: 2, Timeout: time.Second, Logger: newTestLogger(t), ServerAddressProvider: &testAddrProvider{addressOverride}}\n\ttrans1, err := NewTCPTransportWithConfig(\"localhost:0\", nil, config)\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tdefer func() { _ = trans1.Close() }()\n\n\tlocal := trans1.LocalAddr()\n\tenc := trans1.EncodePeer(\"id1\", local)\n\tdec := trans1.DecodePeer(enc)\n\n\tif dec != ServerAddress(addressOverride) {\n\t\tt.Fatalf(\"enc/dec fail: %v %v\", dec, addressOverride)\n\t}\n}\n\nfunc TestNetworkTransport_PooledConn(t *testing.T) {\n\t// Transport 1 is consumer\n\ttrans1, err := NewTCPTransportWithLogger(\"localhost:0\", nil, 2, time.Second, newTestLogger(t))\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tdefer func() { _ = trans1.Close() }()\n\trpcCh := trans1.Consumer()\n\n\t// Make the RPC request\n\targs := AppendEntriesRequest{\n\t\tTerm:         10,\n\t\tPrevLogEntry: 100,\n\t\tPrevLogTerm:  4,\n\t\tEntries: []*Log{\n\t\t\t{\n\t\t\t\tIndex: 101,\n\t\t\t\tTerm:  4,\n\t\t\t\tType:  LogNoop,\n\t\t\t},\n\t\t},\n\t\tLeaderCommitIndex: 90,\n\t\tRPCHeader:         RPCHeader{Addr: []byte(\"cartman\")},\n\t}\n\n\tresp := AppendEntriesResponse{\n\t\tTerm:    4,\n\t\tLastLog: 90,\n\t\tSuccess: true,\n\t}\n\n\t// errCh is used to report errors from any of the goroutines\n\t// created in this test.\n\t// It is buffered as to not block.\n\terrCh := make(chan error, 100)\n\n\t// Listen for a request\n\tgo func() {\n\t\tfor {\n\t\t\tselect {\n\t\t\tcase rpc := <-rpcCh:\n\t\t\t\t// Verify the command\n\t\t\t\treq := rpc.Command.(*AppendEntriesRequest)\n\t\t\t\tif !reflect.DeepEqual(req, &args) {\n\t\t\t\t\terrCh <- fmt.Errorf(\"command mismatch: %#v %#v\", *req, args)\n\t\t\t\t\treturn\n\t\t\t\t}\n\t\t\t\trpc.Respond(&resp, nil)\n\n\t\t\tcase <-time.After(200 * time.Millisecond):\n\t\t\t\treturn\n\t\t\t}\n\t\t}\n\t}()\n\n\t// Transport 2 makes outbound request, 3 conn pool\n\ttrans2, err := NewTCPTransportWithLogger(\"localhost:0\", nil, 3, time.Second, newTestLogger(t))\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tdefer func() { _ = trans2.Close() }()\n\n\t// Create wait group\n\twg := &sync.WaitGroup{}\n\n\t// Try to do parallel appends, should stress the conn pool\n\tfor i := 0; i < 5; i++ {\n\t\twg.Add(1)\n\n\t\tgo func() {\n\t\t\tdefer wg.Done()\n\t\t\tvar out AppendEntriesResponse\n\t\t\tif err := trans2.AppendEntries(\"id1\", trans1.LocalAddr(), &args, &out); err != nil {\n\t\t\t\terrCh <- err\n\t\t\t\treturn\n\t\t\t}\n\n\t\t\t// Verify the response\n\t\t\tif !reflect.DeepEqual(resp, out) {\n\t\t\t\terrCh <- fmt.Errorf(\"command mismatch: %#v %#v\", resp, out)\n\t\t\t\treturn\n\t\t\t}\n\t\t}()\n\t}\n\n\t// Wait for the routines to finish\n\twg.Wait()\n\n\t// Check if we received any errors from the above goroutines.\n\tif len(errCh) > 0 {\n\t\tt.Fatal(<-errCh)\n\t}\n\n\t// Check the conn pool size\n\taddr := trans1.LocalAddr()\n\tif len(trans2.connPool[addr]) != 3 {\n\t\tt.Fatalf(\"Expected 3 pooled conns!\")\n\t}\n}\n\nfunc makeTransport(t *testing.T, useAddrProvider bool, addressOverride string) (*NetworkTransport, error) {\n\tconfig := &NetworkTransportConfig{\n\t\tMaxPool: 2,\n\t\t// Setting this because older tests for pipelining were written when this\n\t\t// was a constant and block forever if it's not large enough.\n\t\tMaxRPCsInFlight: 130,\n\t\tTimeout:         time.Second,\n\t\tLogger:          newTestLogger(t),\n\t}\n\tif useAddrProvider {\n\t\tconfig.ServerAddressProvider = &testAddrProvider{addressOverride}\n\t}\n\treturn NewTCPTransportWithConfig(\"localhost:0\", nil, config)\n}\n\ntype testCountingWriter struct {\n\tt        *testing.T\n\tnumCalls *int32\n}\n\nfunc (tw testCountingWriter) Write(p []byte) (n int, err error) {\n\tatomic.AddInt32(tw.numCalls, 1)\n\tif !strings.Contains(string(p), \"failed to accept connection\") {\n\t\ttw.t.Error(\"did not receive expected log message\")\n\t}\n\ttw.t.Log(\"countingWriter:\", string(p))\n\treturn len(p), nil\n}\n\ntype testCountingStreamLayer struct {\n\tnumCalls *int32\n}\n\nfunc (sl testCountingStreamLayer) Accept() (net.Conn, error) {\n\t*sl.numCalls++\n\treturn nil, fmt.Errorf(\"intentional error in test\")\n}\n\nfunc (sl testCountingStreamLayer) Close() error {\n\treturn nil\n}\n\nfunc (sl testCountingStreamLayer) Addr() net.Addr {\n\tpanic(\"not needed\")\n}\n\nfunc (sl testCountingStreamLayer) Dial(address ServerAddress, timeout time.Duration) (net.Conn, error) {\n\treturn nil, fmt.Errorf(\"not needed\")\n}\n\n// TestNetworkTransport_ListenBackoff tests that Accept() errors in NetworkTransport#listen()\n// do not result in a tight loop and spam the log. We verify this here by counting the number\n// of calls against Accept() and the logger\nfunc TestNetworkTransport_ListenBackoff(t *testing.T) {\n\t// testTime is the amount of time we will allow NetworkTransport#listen() to run\n\t// This needs to be long enough that to verify that maxDelay is in force,\n\t// but not so long as to be obnoxious when running the test suite.\n\tconst testTime = 4 * time.Second\n\n\tvar numAccepts int32\n\tvar numLogs int32\n\tcountingWriter := testCountingWriter{t, &numLogs}\n\tcountingLogger := hclog.New(&hclog.LoggerOptions{\n\t\tName:   \"test\",\n\t\tOutput: countingWriter,\n\t\tLevel:  hclog.DefaultLevel,\n\t})\n\ttransport := NetworkTransport{\n\t\tlogger:     countingLogger,\n\t\tstream:     testCountingStreamLayer{&numAccepts},\n\t\tshutdownCh: make(chan struct{}),\n\t}\n\n\tgo transport.listen()\n\n\t// sleep (+yield) for testTime seconds before asking the accept loop to shut down\n\ttime.Sleep(testTime)\n\t_ = transport.Close()\n\n\t// Verify that the method exited (but without block this test)\n\t// maxDelay == 1s, so we will give the routine 1.25s to loop around and shut down.\n\tselect {\n\tcase <-transport.shutdownCh:\n\tcase <-time.After(1250 * time.Millisecond):\n\t\tt.Error(\"timed out waiting for NetworkTransport to shut down\")\n\t}\n\trequire.True(t, transport.shutdown)\n\n\t// In testTime==4s, we expect to loop approximately 12 times\n\t// with the following delays (in ms):\n\t//   0+5+10+20+40+80+160+320+640+1000+1000+1000 == 4275 ms\n\t// Too few calls suggests that the minDelay is not in force; too many calls suggests that the\n\t// maxDelay is not in force or that the back-off isn't working at all.\n\t// We'll leave a little flex; the important thing here is the asymptotic behavior.\n\t// If the minDelay or maxDelay in NetworkTransport#listen() are modified, this test may fail\n\t// and need to be adjusted.\n\trequire.True(t, numAccepts > 10)\n\trequire.True(t, numAccepts < 13)\n\trequire.True(t, numLogs > 10)\n\trequire.True(t, numLogs < 13)\n}\n"
  },
  {
    "path": "observer.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"sync/atomic\"\n\t\"time\"\n)\n\n// Observation is sent along the given channel to observers when an event occurs.\ntype Observation struct {\n\t// Raft holds the Raft instance generating the observation.\n\tRaft *Raft\n\t// Data holds observation-specific data. Possible types are\n\t// RequestVoteRequest\n\t// RaftState\n\t// PeerObservation\n\t// LeaderObservation\n\tData interface{}\n}\n\n// LeaderObservation is used for the data when leadership changes.\ntype LeaderObservation struct {\n\t// DEPRECATED The LeaderAddr field should now be used\n\tLeader     ServerAddress\n\tLeaderAddr ServerAddress\n\tLeaderID   ServerID\n}\n\n// PeerObservation is sent to observers when peers change.\ntype PeerObservation struct {\n\tRemoved bool\n\tPeer    Server\n}\n\n// FailedHeartbeatObservation is sent when a node fails to heartbeat with the leader\ntype FailedHeartbeatObservation struct {\n\tPeerID      ServerID\n\tLastContact time.Time\n}\n\n// ResumedHeartbeatObservation is sent when a node resumes to heartbeat with the leader following failures\ntype ResumedHeartbeatObservation struct {\n\tPeerID ServerID\n}\n\n// nextObserverId is used to provide a unique ID for each observer to aid in\n// deregistration.\nvar nextObserverID uint64\n\n// FilterFn is a function that can be registered in order to filter observations.\n// The function reports whether the observation should be included - if\n// it returns false, the observation will be filtered out.\ntype FilterFn func(o *Observation) bool\n\n// Observer describes what to do with a given observation.\ntype Observer struct {\n\t// numObserved and numDropped are performance counters for this observer.\n\t// 64 bit types must be 64 bit aligned to use with atomic operations on\n\t// 32 bit platforms, so keep them at the top of the struct.\n\tnumObserved uint64\n\tnumDropped  uint64\n\n\t// channel receives observations.\n\tchannel chan Observation\n\n\t// blocking, if true, will cause Raft to block when sending an observation\n\t// to this observer. This should generally be set to false.\n\tblocking bool\n\n\t// filter will be called to determine if an observation should be sent to\n\t// the channel.\n\tfilter FilterFn\n\n\t// id is the ID of this observer in the Raft map.\n\tid uint64\n}\n\n// NewObserver creates a new observer that can be registered\n// to make observations on a Raft instance. Observations\n// will be sent on the given channel if they satisfy the\n// given filter.\n//\n// If blocking is true, the observer will block when it can't\n// send on the channel, otherwise it may discard events.\nfunc NewObserver(channel chan Observation, blocking bool, filter FilterFn) *Observer {\n\treturn &Observer{\n\t\tchannel:  channel,\n\t\tblocking: blocking,\n\t\tfilter:   filter,\n\t\tid:       atomic.AddUint64(&nextObserverID, 1),\n\t}\n}\n\n// GetNumObserved returns the number of observations.\nfunc (or *Observer) GetNumObserved() uint64 {\n\treturn atomic.LoadUint64(&or.numObserved)\n}\n\n// GetNumDropped returns the number of dropped observations due to blocking.\nfunc (or *Observer) GetNumDropped() uint64 {\n\treturn atomic.LoadUint64(&or.numDropped)\n}\n\n// RegisterObserver registers a new observer.\nfunc (r *Raft) RegisterObserver(or *Observer) {\n\tr.observersLock.Lock()\n\tdefer r.observersLock.Unlock()\n\tr.observers[or.id] = or\n}\n\n// DeregisterObserver deregisters an observer.\nfunc (r *Raft) DeregisterObserver(or *Observer) {\n\tr.observersLock.Lock()\n\tdefer r.observersLock.Unlock()\n\tdelete(r.observers, or.id)\n}\n\n// observe sends an observation to every observer.\nfunc (r *Raft) observe(o interface{}) {\n\t// In general observers should not block. But in any case this isn't\n\t// disastrous as we only hold a read lock, which merely prevents\n\t// registration / deregistration of observers.\n\tr.observersLock.RLock()\n\tdefer r.observersLock.RUnlock()\n\tfor _, or := range r.observers {\n\t\t// It's wasteful to do this in the loop, but for the common case\n\t\t// where there are no observers we won't create any objects.\n\t\tob := Observation{Raft: r, Data: o}\n\t\tif or.filter != nil && !or.filter(&ob) {\n\t\t\tcontinue\n\t\t}\n\t\tif or.channel == nil {\n\t\t\tcontinue\n\t\t}\n\t\tif or.blocking {\n\t\t\tor.channel <- ob\n\t\t\tatomic.AddUint64(&or.numObserved, 1)\n\t\t} else {\n\t\t\tselect {\n\t\t\tcase or.channel <- ob:\n\t\t\t\tatomic.AddUint64(&or.numObserved, 1)\n\t\t\tdefault:\n\t\t\t\tatomic.AddUint64(&or.numDropped, 1)\n\t\t\t}\n\t\t}\n\t}\n}\n"
  },
  {
    "path": "peersjson.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"bytes\"\n\t\"encoding/json\"\n\t\"os\"\n)\n\n// ReadPeersJSON consumes a legacy peers.json file in the format of the old JSON\n// peer store and creates a new-style configuration structure. This can be used\n// to migrate this data or perform manual recovery when running protocol versions\n// that can interoperate with older, unversioned Raft servers. This should not be\n// used once server IDs are in use, because the old peers.json file didn't have\n// support for these, nor non-voter suffrage types.\nfunc ReadPeersJSON(path string) (Configuration, error) {\n\t// Read in the file.\n\tbuf, err := os.ReadFile(path)\n\tif err != nil {\n\t\treturn Configuration{}, err\n\t}\n\n\t// Parse it as JSON.\n\tvar peers []string\n\tdec := json.NewDecoder(bytes.NewReader(buf))\n\tif err := dec.Decode(&peers); err != nil {\n\t\treturn Configuration{}, err\n\t}\n\n\t// Map it into the new-style configuration structure. We can only specify\n\t// voter roles here, and the ID has to be the same as the address.\n\tvar configuration Configuration\n\tfor _, peer := range peers {\n\t\tserver := Server{\n\t\t\tSuffrage: Voter,\n\t\t\tID:       ServerID(peer),\n\t\t\tAddress:  ServerAddress(peer),\n\t\t}\n\t\tconfiguration.Servers = append(configuration.Servers, server)\n\t}\n\n\t// We should only ingest valid configurations.\n\tif err := checkConfiguration(configuration); err != nil {\n\t\treturn Configuration{}, err\n\t}\n\treturn configuration, nil\n}\n\n// configEntry is used when decoding a new-style peers.json.\ntype configEntry struct {\n\t// ID is the ID of the server (a UUID, usually).\n\tID ServerID `json:\"id\"`\n\n\t// Address is the host:port of the server.\n\tAddress ServerAddress `json:\"address\"`\n\n\t// NonVoter controls the suffrage. We choose this sense so people\n\t// can leave this out and get a Voter by default.\n\tNonVoter bool `json:\"non_voter\"`\n}\n\n// ReadConfigJSON reads a new-style peers.json and returns a configuration\n// structure. This can be used to perform manual recovery when running protocol\n// versions that use server IDs.\nfunc ReadConfigJSON(path string) (Configuration, error) {\n\t// Read in the file.\n\tbuf, err := os.ReadFile(path)\n\tif err != nil {\n\t\treturn Configuration{}, err\n\t}\n\n\t// Parse it as JSON.\n\tvar peers []configEntry\n\tdec := json.NewDecoder(bytes.NewReader(buf))\n\tif err := dec.Decode(&peers); err != nil {\n\t\treturn Configuration{}, err\n\t}\n\n\t// Map it into the new-style configuration structure.\n\tvar configuration Configuration\n\tfor _, peer := range peers {\n\t\tsuffrage := Voter\n\t\tif peer.NonVoter {\n\t\t\tsuffrage = Nonvoter\n\t\t}\n\t\tserver := Server{\n\t\t\tSuffrage: suffrage,\n\t\t\tID:       peer.ID,\n\t\t\tAddress:  peer.Address,\n\t\t}\n\t\tconfiguration.Servers = append(configuration.Servers, server)\n\t}\n\n\t// We should only ingest valid configurations.\n\tif err := checkConfiguration(configuration); err != nil {\n\t\treturn Configuration{}, err\n\t}\n\treturn configuration, nil\n}\n"
  },
  {
    "path": "peersjson_test.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"os\"\n\t\"path/filepath\"\n\t\"reflect\"\n\t\"strings\"\n\t\"testing\"\n)\n\nfunc TestPeersJSON_BadConfiguration(t *testing.T) {\n\tvar err error\n\tvar base string\n\tbase, err = os.MkdirTemp(\"\", \"\")\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tdefer func() { _ = os.RemoveAll(base) }()\n\n\tpeers := filepath.Join(base, \"peers.json\")\n\tif err = os.WriteFile(peers, []byte(\"null\"), 0o666); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t_, err = ReadPeersJSON(peers)\n\tif err == nil || !strings.Contains(err.Error(), \"at least one voter\") {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n}\n\nfunc TestPeersJSON_ReadPeersJSON(t *testing.T) {\n\tvar err error\n\tvar base string\n\tbase, err = os.MkdirTemp(\"\", \"\")\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tdefer func() { _ = os.RemoveAll(base) }()\n\n\tcontent := []byte(`\n[\"127.0.0.1:123\",\n \"127.0.0.2:123\",\n \"127.0.0.3:123\"]\n`)\n\tpeers := filepath.Join(base, \"peers.json\")\n\tif err = os.WriteFile(peers, content, 0o666); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tvar configuration Configuration\n\tconfiguration, err = ReadPeersJSON(peers)\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\texpected := Configuration{\n\t\tServers: []Server{\n\t\t\t{\n\t\t\t\tSuffrage: Voter,\n\t\t\t\tID:       ServerID(\"127.0.0.1:123\"),\n\t\t\t\tAddress:  ServerAddress(\"127.0.0.1:123\"),\n\t\t\t},\n\t\t\t{\n\t\t\t\tSuffrage: Voter,\n\t\t\t\tID:       ServerID(\"127.0.0.2:123\"),\n\t\t\t\tAddress:  ServerAddress(\"127.0.0.2:123\"),\n\t\t\t},\n\t\t\t{\n\t\t\t\tSuffrage: Voter,\n\t\t\t\tID:       ServerID(\"127.0.0.3:123\"),\n\t\t\t\tAddress:  ServerAddress(\"127.0.0.3:123\"),\n\t\t\t},\n\t\t},\n\t}\n\tif !reflect.DeepEqual(configuration, expected) {\n\t\tt.Fatalf(\"bad configuration: %+v != %+v\", configuration, expected)\n\t}\n}\n\nfunc TestPeersJSON_ReadConfigJSON(t *testing.T) {\n\tvar err error\n\tvar base string\n\tbase, err = os.MkdirTemp(\"\", \"\")\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tdefer func() { _ = os.RemoveAll(base) }()\n\n\tcontent := []byte(`\n[\n  {\n    \"id\": \"adf4238a-882b-9ddc-4a9d-5b6758e4159e\",\n    \"address\": \"127.0.0.1:123\",\n    \"non_voter\": false\n  },\n  {\n    \"id\": \"8b6dda82-3103-11e7-93ae-92361f002671\",\n    \"address\": \"127.0.0.2:123\"\n  },\n  {\n    \"id\": \"97e17742-3103-11e7-93ae-92361f002671\",\n    \"address\": \"127.0.0.3:123\",\n    \"non_voter\": true\n  }\n]\n`)\n\tpeers := filepath.Join(base, \"peers.json\")\n\tif err = os.WriteFile(peers, content, 0o666); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\tvar configuration Configuration\n\tconfiguration, err = ReadConfigJSON(peers)\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\texpected := Configuration{\n\t\tServers: []Server{\n\t\t\t{\n\t\t\t\tSuffrage: Voter,\n\t\t\t\tID:       ServerID(\"adf4238a-882b-9ddc-4a9d-5b6758e4159e\"),\n\t\t\t\tAddress:  ServerAddress(\"127.0.0.1:123\"),\n\t\t\t},\n\t\t\t{\n\t\t\t\tSuffrage: Voter,\n\t\t\t\tID:       ServerID(\"8b6dda82-3103-11e7-93ae-92361f002671\"),\n\t\t\t\tAddress:  ServerAddress(\"127.0.0.2:123\"),\n\t\t\t},\n\t\t\t{\n\t\t\t\tSuffrage: Nonvoter,\n\t\t\t\tID:       ServerID(\"97e17742-3103-11e7-93ae-92361f002671\"),\n\t\t\t\tAddress:  ServerAddress(\"127.0.0.3:123\"),\n\t\t\t},\n\t\t},\n\t}\n\tif !reflect.DeepEqual(configuration, expected) {\n\t\tt.Fatalf(\"bad configuration: %+v != %+v\", configuration, expected)\n\t}\n}\n"
  },
  {
    "path": "progress.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"context\"\n\t\"io\"\n\t\"sync\"\n\t\"time\"\n\n\thclog \"github.com/hashicorp/go-hclog\"\n)\n\nconst (\n\tsnapshotRestoreMonitorInterval = 10 * time.Second\n)\n\ntype snapshotRestoreMonitor struct {\n\tlogger          hclog.Logger\n\tcr              CountingReader\n\tsize            int64\n\tnetworkTransfer bool\n\n\tonce   sync.Once\n\tcancel func()\n\tdoneCh chan struct{}\n}\n\nfunc startSnapshotRestoreMonitor(\n\tlogger hclog.Logger,\n\tcr CountingReader,\n\tsize int64,\n\tnetworkTransfer bool,\n) *snapshotRestoreMonitor {\n\tctx, cancel := context.WithCancel(context.Background())\n\n\tm := &snapshotRestoreMonitor{\n\t\tlogger:          logger,\n\t\tcr:              cr,\n\t\tsize:            size,\n\t\tnetworkTransfer: networkTransfer,\n\t\tcancel:          cancel,\n\t\tdoneCh:          make(chan struct{}),\n\t}\n\tgo m.run(ctx)\n\treturn m\n}\n\nfunc (m *snapshotRestoreMonitor) run(ctx context.Context) {\n\tdefer close(m.doneCh)\n\n\tticker := time.NewTicker(snapshotRestoreMonitorInterval)\n\tdefer ticker.Stop()\n\n\tranOnce := false\n\tfor {\n\t\tselect {\n\t\tcase <-ctx.Done():\n\t\t\tif !ranOnce {\n\t\t\t\tm.runOnce()\n\t\t\t}\n\t\t\treturn\n\t\tcase <-ticker.C:\n\t\t\tm.runOnce()\n\t\t\tranOnce = true\n\t\t}\n\t}\n}\n\nfunc (m *snapshotRestoreMonitor) runOnce() {\n\treadBytes := m.cr.Count()\n\tpct := float64(100*readBytes) / float64(m.size)\n\n\tmessage := \"snapshot restore progress\"\n\tif m.networkTransfer {\n\t\tmessage = \"snapshot network transfer progress\"\n\t}\n\n\tm.logger.Info(message,\n\t\t\"read-bytes\", readBytes,\n\t\t\"percent-complete\", hclog.Fmt(\"%0.2f%%\", pct),\n\t)\n}\n\nfunc (m *snapshotRestoreMonitor) StopAndWait() {\n\tm.once.Do(func() {\n\t\tm.cancel()\n\t\t<-m.doneCh\n\t})\n}\n\ntype CountingReader interface {\n\tio.Reader\n\tCount() int64\n}\n\ntype countingReader struct {\n\treader io.Reader\n\n\tmu    sync.Mutex\n\tbytes int64\n}\n\nfunc (r *countingReader) Read(p []byte) (n int, err error) {\n\tn, err = r.reader.Read(p)\n\tr.mu.Lock()\n\tr.bytes += int64(n)\n\tr.mu.Unlock()\n\treturn n, err\n}\n\nfunc (r *countingReader) Count() int64 {\n\tr.mu.Lock()\n\tdefer r.mu.Unlock()\n\treturn r.bytes\n}\n\nfunc newCountingReader(r io.Reader) *countingReader {\n\treturn &countingReader{reader: r}\n}\n\ntype countingReadCloser struct {\n\t*countingReader\n\treadCloser io.ReadCloser\n}\n\nfunc newCountingReadCloser(rc io.ReadCloser) *countingReadCloser {\n\treturn &countingReadCloser{\n\t\tcountingReader: newCountingReader(rc),\n\t\treadCloser:     rc,\n\t}\n}\n\nfunc (c countingReadCloser) Close() error {\n\treturn c.readCloser.Close()\n}\n\nfunc (c countingReadCloser) WrappedReadCloser() io.ReadCloser {\n\treturn c.readCloser\n}\n\n// ReadCloserWrapper allows access to an underlying ReadCloser from a wrapper.\ntype ReadCloserWrapper interface {\n\tio.ReadCloser\n\tWrappedReadCloser() io.ReadCloser\n}\n\nvar _ ReadCloserWrapper = &countingReadCloser{}\n"
  },
  {
    "path": "raft-compat/go.mod",
    "content": "module github.com/hashicorp/raft/compat\n\ngo 1.20\n\nrequire github.com/stretchr/testify v1.11.1\n\nrequire (\n\tgithub.com/armon/go-metrics v0.4.1 // indirect\n\tgithub.com/fatih/color v1.13.0 // indirect\n\tgithub.com/hashicorp/go-hclog v1.6.3 // indirect\n\tgithub.com/hashicorp/go-immutable-radix v1.0.0 // indirect\n\tgithub.com/hashicorp/go-metrics v0.5.4 // indirect\n\tgithub.com/hashicorp/go-msgpack/v2 v2.1.2 // indirect\n\tgithub.com/hashicorp/golang-lru v0.5.0 // indirect\n\tgithub.com/mattn/go-colorable v0.1.12 // indirect\n\tgithub.com/mattn/go-isatty v0.0.14 // indirect\n\tgolang.org/x/sys v0.13.0 // indirect\n)\n\nreplace github.com/hashicorp/raft-previous-version => ./raft-previous-version\n\nreplace github.com/hashicorp/raft => ../\n\nrequire (\n\tgithub.com/davecgh/go-spew v1.1.1 // indirect\n\tgithub.com/hashicorp/raft v1.6.1\n\tgithub.com/hashicorp/raft-previous-version v1.2.0\n\tgithub.com/pmezard/go-difflib v1.0.0 // indirect\n\tgopkg.in/yaml.v3 v3.0.1 // indirect\n)\n"
  },
  {
    "path": "raft-compat/go.sum",
    "content": "cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=\ngithub.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=\ngithub.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=\ngithub.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=\ngithub.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=\ngithub.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=\ngithub.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho=\ngithub.com/armon/go-metrics v0.4.1 h1:hR91U9KYmb6bLBYLQjyM+3j+rcd/UhE+G78SFnF8gJA=\ngithub.com/armon/go-metrics v0.4.1/go.mod h1:E6amYzXo6aW1tqzoZGT755KkbgrJsSdpwZ+3JqfkOG4=\ngithub.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=\ngithub.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=\ngithub.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=\ngithub.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=\ngithub.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag=\ngithub.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I=\ngithub.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=\ngithub.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=\ngithub.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=\ngithub.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w=\ngithub.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk=\ngithub.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=\ngithub.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=\ngithub.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY=\ngithub.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=\ngithub.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=\ngithub.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A=\ngithub.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=\ngithub.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=\ngithub.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=\ngithub.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=\ngithub.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=\ngithub.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=\ngithub.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=\ngithub.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=\ngithub.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=\ngithub.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=\ngithub.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=\ngithub.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=\ngithub.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=\ngithub.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=\ngithub.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=\ngithub.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=\ngithub.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=\ngithub.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=\ngithub.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=\ngithub.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k=\ngithub.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M=\ngithub.com/hashicorp/go-immutable-radix v1.0.0 h1:AKDB1HM5PWEA7i4nhcpwOrO2byshxBjXVn/J/3+z5/0=\ngithub.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=\ngithub.com/hashicorp/go-metrics v0.5.4 h1:8mmPiIJkTPPEbAiV97IxdAGNdRdaWwVap1BU6elejKY=\ngithub.com/hashicorp/go-metrics v0.5.4/go.mod h1:CG5yz4NZ/AI/aQt9Ucm/vdBnbh7fvmv4lxZ350i+QQI=\ngithub.com/hashicorp/go-msgpack/v2 v2.1.2 h1:4Ee8FTp834e+ewB71RDrQ0VKpyFdrKOjvYtnQ/ltVj0=\ngithub.com/hashicorp/go-msgpack/v2 v2.1.2/go.mod h1:upybraOAblm4S7rx0+jeNy+CWWhzywQsSRV5033mMu4=\ngithub.com/hashicorp/go-retryablehttp v0.5.3/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs=\ngithub.com/hashicorp/go-uuid v1.0.0 h1:RS8zrF7PhGwyNPOtxSClXXj9HA8feRnJzgnI1RJCSnM=\ngithub.com/hashicorp/go-uuid v1.0.0/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro=\ngithub.com/hashicorp/golang-lru v0.5.0 h1:CL2msUPvZTLb5O648aiLNJw3hnBxN2+1Jq8rCOH9wdo=\ngithub.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=\ngithub.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4=\ngithub.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=\ngithub.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=\ngithub.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=\ngithub.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=\ngithub.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=\ngithub.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM=\ngithub.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=\ngithub.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=\ngithub.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=\ngithub.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=\ngithub.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI=\ngithub.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=\ngithub.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=\ngithub.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=\ngithub.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=\ngithub.com/mattn/go-colorable v0.1.12 h1:jF+Du6AlPIjs2BiUiQlKOX0rt3SujHxPnksPKZbaA40=\ngithub.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=\ngithub.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=\ngithub.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y=\ngithub.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=\ngithub.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=\ngithub.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=\ngithub.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=\ngithub.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=\ngithub.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=\ngithub.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=\ngithub.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=\ngithub.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY=\ngithub.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=\ngithub.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=\ngithub.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=\ngithub.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=\ngithub.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=\ngithub.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=\ngithub.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=\ngithub.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=\ngithub.com/prometheus/client_golang v1.4.0/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU=\ngithub.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M=\ngithub.com/prometheus/client_golang v1.11.1/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0=\ngithub.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=\ngithub.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=\ngithub.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=\ngithub.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=\ngithub.com/prometheus/common v0.9.1/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4=\ngithub.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo=\ngithub.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc=\ngithub.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=\ngithub.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=\ngithub.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A=\ngithub.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=\ngithub.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA=\ngithub.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=\ngithub.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=\ngithub.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88=\ngithub.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=\ngithub.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=\ngithub.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=\ngithub.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=\ngithub.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=\ngithub.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals=\ngithub.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=\ngithub.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=\ngithub.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM=\ngolang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=\ngolang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=\ngolang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=\ngolang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=\ngolang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=\ngolang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=\ngolang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=\ngolang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=\ngolang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=\ngolang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=\ngolang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=\ngolang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=\ngolang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=\ngolang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=\ngolang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=\ngolang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=\ngolang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=\ngolang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=\ngolang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=\ngolang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=\ngolang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=\ngoogle.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=\ngoogle.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=\ngoogle.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=\ngoogle.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=\ngoogle.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=\ngoogle.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=\ngoogle.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=\ngoogle.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=\ngopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=\ngopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=\ngopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=\ngopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=\ngopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=\ngopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=\ngopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=\n"
  },
  {
    "path": "raft-compat/prevote_test.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft_compat\n\nimport (\n\t\"github.com/hashicorp/raft\"\n\traftprevious \"github.com/hashicorp/raft-previous-version\"\n\t\"github.com/hashicorp/raft/compat/testcluster\"\n\t\"github.com/hashicorp/raft/compat/utils\"\n\t\"github.com/stretchr/testify/require\"\n\t\"testing\"\n\t\"time\"\n)\n\nfunc TestRaft_PreVote_BootStrap_PreVote(t *testing.T) {\n\tleaveTransfer := func(t *testing.T, cluster testcluster.RaftCluster, id string) {\n\t\tif cluster.GetLeader().GetLocalID() == id {\n\t\t\ttransfer := cluster.Raft(id).(*raftprevious.Raft).LeadershipTransfer()\n\t\t\tutils.WaitFuture(t, transfer)\n\t\t}\n\t\tf := cluster.Raft(id).(*raftprevious.Raft).Shutdown()\n\t\tutils.WaitFuture(t, f)\n\t}\n\tleaveNoTransfer := func(t *testing.T, cluster testcluster.RaftCluster, id string) {\n\t\tfr := cluster.GetLeader().GetRaft().(*raftprevious.Raft).RemoveServer(raftprevious.ServerID(id), 0, 0)\n\t\tutils.WaitFuture(t, fr)\n\t\tf := cluster.Raft(id).(*raftprevious.Raft).Shutdown()\n\t\tutils.WaitFuture(t, f)\n\t}\n\ttcs := []struct {\n\t\tname     string\n\t\tnumNodes int\n\t\tpreVote  bool\n\t\tLeave    func(t *testing.T, cluster testcluster.RaftCluster, id string)\n\t}{\n\t\t{\"no prevote -> prevote (leave transfer)\", 3, true, leaveTransfer},\n\t\t{\"no prevote -> prevote  (leave no transfer)\", 3, true, leaveNoTransfer},\n\t\t{\"no prevote -> prevote (leave transfer) 5\", 5, true, leaveTransfer},\n\t\t{\"no prevote -> prevote  (leave no transfer) 5\", 5, true, leaveNoTransfer},\n\t\t{\"no prevote -> no prevote (leave transfer)\", 3, false, leaveTransfer},\n\t\t{\"no prevote -> no prevote  (leave no transfer)\", 3, false, leaveNoTransfer},\n\t\t{\"no prevote -> no prevote (leave transfer) 5\", 5, false, leaveTransfer},\n\t\t{\"no prevote -> no prevote  (leave no transfer) 5\", 5, false, leaveNoTransfer},\n\t}\n\tfor _, tc := range tcs {\n\t\tt.Run(tc.name, func(t *testing.T) {\n\n\t\t\tcluster := testcluster.NewPreviousRaftCluster(t, tc.numNodes, \"raftNode\")\n\t\t\tconfiguration := raftprevious.Configuration{}\n\n\t\t\tfor i := 0; i < tc.numNodes; i++ {\n\t\t\t\tvar err error\n\t\t\t\trequire.NoError(t, err)\n\t\t\t\tconfiguration.Servers = append(configuration.Servers, raftprevious.Server{\n\t\t\t\t\tID:      raftprevious.ServerID(cluster.ID(i)),\n\t\t\t\t\tAddress: raftprevious.ServerAddress(cluster.Addr(i)),\n\t\t\t\t})\n\t\t\t}\n\t\t\traft0 := cluster.Raft(cluster.ID(0)).(*raftprevious.Raft)\n\t\t\tboot := raft0.BootstrapCluster(configuration)\n\t\t\tif err := boot.Error(); err != nil {\n\t\t\t\tt.Fatalf(\"bootstrap err: %v\", err)\n\t\t\t}\n\t\t\tutils.WaitForNewLeader(t, \"\", cluster)\n\t\t\tgetLeader := cluster.GetLeader()\n\t\t\trequire.NotEmpty(t, getLeader)\n\t\t\ta, _ := getLeader.GetRaft().(*raftprevious.Raft).LeaderWithID()\n\t\t\trequire.NotEmpty(t, a)\n\t\t\tfuture := getLeader.GetRaft().(*raftprevious.Raft).Apply([]byte(\"test\"), time.Second)\n\t\t\tutils.WaitFuture(t, future)\n\n\t\t\tleader, _ := getLeader.GetRaft().(*raftprevious.Raft).LeaderWithID()\n\t\t\trequire.NotEmpty(t, leader)\n\t\t\t// Upgrade all the followers\n\t\t\tfor i := 0; i < tc.numNodes; i++ {\n\t\t\t\tif getLeader.GetLocalID() == cluster.ID(i) {\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\n\t\t\t\t// Check Leader haven't changed\n\t\t\t\ta, _ := getLeader.GetRaft().(*raftprevious.Raft).LeaderWithID()\n\t\t\t\trequire.Equal(t, a, leader)\n\t\t\t\ttc.Leave(t, cluster, cluster.ID(i))\n\n\t\t\t\t// Keep the store, to be passed to the upgraded node.\n\t\t\t\tstore := cluster.Store(cluster.ID(i))\n\t\t\t\tid := cluster.ID(i)\n\n\t\t\t\t//Delete the node from the cluster\n\t\t\t\tcluster.DeleteNode(cluster.ID(i))\n\n\t\t\t\t//Create an upgraded node with the store\n\t\t\t\trUIT := testcluster.InitUITWithStore(t, id, store.(*raftprevious.InmemStore), func(config *raft.Config) {\n\t\t\t\t\tconfig.PreVoteDisabled = !tc.preVote\n\t\t\t\t})\n\t\t\t\tfuture := getLeader.GetRaft().(*raftprevious.Raft).AddVoter(raftprevious.ServerID(rUIT.GetLocalID()), raftprevious.ServerAddress(rUIT.GetLocalAddr()), 0, 0)\n\t\t\t\tutils.WaitFuture(t, future)\n\t\t\t\t//Add the new node to the cluster\n\t\t\t\tcluster.AddNode(rUIT)\n\n\t\t\t\t// Wait enough to have the configuration propagated.\n\t\t\t\ttime.Sleep(time.Second)\n\n\t\t\t\t//Apply some logs\n\t\t\t\tfuture = getLeader.GetRaft().(*raftprevious.Raft).Apply([]byte(\"test2\"), time.Second)\n\t\t\t\trequire.NoError(t, future.Error())\n\n\t\t\t\t// Check Leader haven't changed as we haven't replaced the leader yet\n\t\t\t\ta, _ = getLeader.GetRaft().(*raftprevious.Raft).LeaderWithID()\n\t\t\t\trequire.Equal(t, a, leader)\n\t\t\t}\n\t\t\t// keep a reference to the store\n\t\t\tstore := cluster.Store(getLeader.GetLocalID())\n\t\t\tid := getLeader.GetLocalID()\n\n\t\t\t//Remove and shutdown the leader node\n\t\t\ttc.Leave(t, cluster, getLeader.GetLocalID())\n\n\t\t\t// Delete the old leader node from the cluster\n\t\t\tcluster.DeleteNode(getLeader.GetLocalID())\n\t\t\toldLeaderID := getLeader.GetLocalID()\n\n\t\t\t// Wait for a new leader to be elected\n\t\t\tutils.WaitForNewLeader(t, oldLeaderID, cluster)\n\t\t\tgetLeader = cluster.GetLeader()\n\t\t\trequire.NotEmpty(t, getLeader)\n\n\t\t\t// Create a new node to replace the deleted one\n\t\t\trUIT := testcluster.InitUITWithStore(t, id, store.(*raftprevious.InmemStore), func(config *raft.Config) { config.PreVoteDisabled = false })\n\t\t\tfa := getLeader.GetRaft().(*raft.Raft).AddVoter(raft.ServerID(rUIT.GetLocalID()), raft.ServerAddress(rUIT.GetLocalAddr()), 0, 0)\n\t\t\tutils.WaitFuture(t, fa)\n\n\t\t\t// Wait for new leader, (this happens because of not having prevote)\n\t\t\tutils.WaitForNewLeader(t, \"\", cluster)\n\t\t\tnewLeaderID := rUIT.GetLeaderID()\n\t\t\trequire.NotEmpty(t, newLeaderID)\n\n\t\t\trequire.NotEqual(t, newLeaderID, leader)\n\n\t\t\tnewLeader := cluster.GetLeader()\n\t\t\t//Apply some logs\n\t\t\tfuture = newLeader.GetRaft().(*raft.Raft).Apply([]byte(\"test2\"), time.Second)\n\t\t\trequire.NoError(t, future.Error())\n\n\t\t\t// Check Leader haven't changed as we haven't replaced the leader yet\n\t\t\tnewAddr, _ := newLeader.GetRaft().(*raft.Raft).LeaderWithID()\n\t\t\trequire.Equal(t, string(newAddr), newLeader.GetLocalAddr())\n\n\t\t\trequire.Equal(t, tc.numNodes, rUIT.NumLogs())\n\t\t})\n\t}\n\n}\n\nfunc TestRaft_PreVote_Rollback(t *testing.T) {\n\tleaveTransfer := func(t *testing.T, cluster testcluster.RaftCluster, id string) {\n\t\tif cluster.GetLeader().GetLocalID() == id {\n\t\t\ttransfer := cluster.Raft(id).(*raft.Raft).LeadershipTransfer()\n\t\t\tutils.WaitFuture(t, transfer)\n\t\t}\n\t\tf := cluster.Raft(id).(*raft.Raft).Shutdown()\n\t\tutils.WaitFuture(t, f)\n\t}\n\tleaveNoTransfer := func(t *testing.T, cluster testcluster.RaftCluster, id string) {\n\t\tfr := cluster.GetLeader().GetRaft().(*raft.Raft).RemoveServer(raft.ServerID(id), 0, 0)\n\t\tutils.WaitFuture(t, fr)\n\t\tf := cluster.Raft(id).(*raft.Raft).Shutdown()\n\t\tutils.WaitFuture(t, f)\n\t}\n\ttcs := []struct {\n\t\tname     string\n\t\tnumNodes int\n\t\tpreVote  bool\n\t\tLeave    func(t *testing.T, cluster testcluster.RaftCluster, id string)\n\t}{\n\t\t{\"no prevote -> prevote (leave transfer)\", 3, true, leaveTransfer},\n\t\t{\"no prevote -> prevote  (leave no transfer)\", 3, true, leaveNoTransfer},\n\t\t{\"no prevote -> prevote (leave transfer) 5\", 5, true, leaveTransfer},\n\t\t{\"no prevote -> prevote  (leave no transfer) 5\", 5, true, leaveNoTransfer},\n\t\t{\"no prevote -> no prevote (leave transfer)\", 3, false, leaveTransfer},\n\t\t{\"no prevote -> no prevote  (leave no transfer)\", 3, false, leaveNoTransfer},\n\t\t{\"no prevote -> no prevote (leave transfer) 5\", 5, false, leaveTransfer},\n\t\t{\"no prevote -> no prevote  (leave no transfer) 5\", 5, false, leaveNoTransfer},\n\t}\n\tfor _, tc := range tcs {\n\t\tt.Run(tc.name, func(t *testing.T) {\n\n\t\t\tcluster := testcluster.NewUITRaftCluster(t, tc.numNodes, \"raftIUTNode\")\n\t\t\tconfiguration := raft.Configuration{}\n\n\t\t\tfor i := 0; i < tc.numNodes; i++ {\n\t\t\t\tvar err error\n\t\t\t\trequire.NoError(t, err)\n\t\t\t\tconfiguration.Servers = append(configuration.Servers, raft.Server{\n\t\t\t\t\tID:      raft.ServerID(cluster.ID(i)),\n\t\t\t\t\tAddress: raft.ServerAddress(cluster.Addr(i)),\n\t\t\t\t})\n\t\t\t}\n\t\t\traft0 := cluster.Raft(cluster.ID(0)).(*raft.Raft)\n\t\t\tboot := raft0.BootstrapCluster(configuration)\n\t\t\tif err := boot.Error(); err != nil {\n\t\t\t\tt.Fatalf(\"bootstrap err: %v\", err)\n\t\t\t}\n\t\t\tutils.WaitForNewLeader(t, \"\", cluster)\n\t\t\tgetLeader := cluster.GetLeader()\n\t\t\trequire.NotEmpty(t, getLeader)\n\t\t\ta, _ := getLeader.GetRaft().(*raft.Raft).LeaderWithID()\n\t\t\trequire.NotEmpty(t, a)\n\t\t\tfuture := getLeader.GetRaft().(*raft.Raft).Apply([]byte(\"test\"), time.Second)\n\t\t\tutils.WaitFuture(t, future)\n\n\t\t\tleader, _ := getLeader.GetRaft().(*raft.Raft).LeaderWithID()\n\t\t\trequire.NotEmpty(t, leader)\n\t\t\t// Upgrade all the followers\n\t\t\tfor i := 0; i < tc.numNodes; i++ {\n\t\t\t\tif getLeader.GetLocalID() == cluster.ID(i) {\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\n\t\t\t\t// Check Leader haven't changed\n\t\t\t\ta, _ := getLeader.GetRaft().(*raft.Raft).LeaderWithID()\n\t\t\t\trequire.Equal(t, a, leader)\n\t\t\t\ttc.Leave(t, cluster, cluster.ID(i))\n\n\t\t\t\t// Keep the store, to be passed to the upgraded node.\n\t\t\t\tstore := cluster.Store(cluster.ID(i))\n\t\t\t\tid := cluster.ID(i)\n\n\t\t\t\t//Delete the node from the cluster\n\t\t\t\tcluster.DeleteNode(cluster.ID(i))\n\n\t\t\t\t//Create an upgraded node with the store\n\t\t\t\trUIT := testcluster.InitPreviousWithStore(t, id, store.(*raft.InmemStore), func(config *raftprevious.Config) {\n\t\t\t\t})\n\t\t\t\tfuture := getLeader.GetRaft().(*raft.Raft).AddVoter(raft.ServerID(rUIT.GetLocalID()), raft.ServerAddress(rUIT.GetLocalAddr()), 0, 0)\n\t\t\t\tutils.WaitFuture(t, future)\n\t\t\t\t//Add the new node to the cluster\n\t\t\t\tcluster.AddNode(rUIT)\n\n\t\t\t\t// Wait enough to have the configuration propagated.\n\t\t\t\ttime.Sleep(time.Second)\n\n\t\t\t\t//Apply some logs\n\t\t\t\tfuture = getLeader.GetRaft().(*raft.Raft).Apply([]byte(\"test2\"), time.Second)\n\t\t\t\trequire.NoError(t, future.Error())\n\n\t\t\t\t// Check Leader haven't changed as we haven't replaced the leader yet\n\t\t\t\ta, _ = getLeader.GetRaft().(*raft.Raft).LeaderWithID()\n\t\t\t\trequire.Equal(t, a, leader)\n\t\t\t}\n\t\t\t// keep a reference to the store\n\t\t\tstore := cluster.Store(getLeader.GetLocalID())\n\t\t\tid := getLeader.GetLocalID()\n\n\t\t\t//Remove and shutdown the leader node\n\t\t\ttc.Leave(t, cluster, getLeader.GetLocalID())\n\n\t\t\t// Delete the old leader node from the cluster\n\t\t\tcluster.DeleteNode(getLeader.GetLocalID())\n\t\t\toldLeaderID := getLeader.GetLocalID()\n\n\t\t\t// Wait for a new leader to be elected\n\t\t\tutils.WaitForNewLeader(t, oldLeaderID, cluster)\n\t\t\tgetLeader = cluster.GetLeader()\n\t\t\trequire.NotEmpty(t, getLeader)\n\n\t\t\t// Create a new node to replace the deleted one\n\t\t\trUIT := testcluster.InitPreviousWithStore(t, id, store.(*raft.InmemStore), func(config *raftprevious.Config) {})\n\t\t\tfa := getLeader.GetRaft().(*raftprevious.Raft).AddVoter(raftprevious.ServerID(rUIT.GetLocalID()), raftprevious.ServerAddress(rUIT.GetLocalAddr()), 0, 0)\n\t\t\tutils.WaitFuture(t, fa)\n\n\t\t\t// Wait for new leader, (this happens because of not having prevote)\n\t\t\tutils.WaitForNewLeader(t, \"\", cluster)\n\t\t\tnewLeaderID := rUIT.GetLeaderID()\n\t\t\trequire.NotEmpty(t, newLeaderID)\n\n\t\t\trequire.NotEqual(t, newLeaderID, leader)\n\n\t\t\tnewLeader := cluster.GetLeader()\n\t\t\t//Apply some logs\n\t\t\tfuture = newLeader.GetRaft().(*raftprevious.Raft).Apply([]byte(\"test2\"), time.Second)\n\t\t\trequire.NoError(t, future.Error())\n\n\t\t\t// Check Leader haven't changed as we haven't replaced the leader yet\n\t\t\tnewAddr, _ := newLeader.GetRaft().(*raftprevious.Raft).LeaderWithID()\n\t\t\trequire.Equal(t, string(newAddr), newLeader.GetLocalAddr())\n\n\t\t\trequire.Equal(t, tc.numNodes, rUIT.NumLogs())\n\t\t})\n\t}\n\n}\n"
  },
  {
    "path": "raft-compat/rolling_upgrade_test.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft_compat\n\nimport (\n\t\"fmt\"\n\t\"github.com/hashicorp/raft\"\n\traftprevious \"github.com/hashicorp/raft-previous-version\"\n\t\"github.com/hashicorp/raft/compat/testcluster\"\n\t\"github.com/hashicorp/raft/compat/utils\"\n\t\"github.com/stretchr/testify/require\"\n\t\"testing\"\n\t\"time\"\n)\n\n// TestRaft_RollingUpgrade This test perform a rolling upgrade by adding a new node,\n// wait for it to join the cluster and remove one of the old nodes, until all nodes\n// are cycled\nfunc TestRaft_RollingUpgrade(t *testing.T) {\n\ttcs := []struct {\n\t\tName  string\n\t\tLeave func(t *testing.T, cluster testcluster.RaftCluster, id string)\n\t}{\n\t\t{\n\t\t\tName: \"leave before shutdown\",\n\t\t\tLeave: func(t *testing.T, cluster testcluster.RaftCluster, id string) {\n\t\t\t\tfr := cluster.GetLeader().GetRaft().(*raftprevious.Raft).RemoveServer(raftprevious.ServerID(id), 0, 0)\n\t\t\t\tutils.WaitFuture(t, fr)\n\t\t\t\tf := cluster.Raft(id).(*raftprevious.Raft).Shutdown()\n\t\t\t\tutils.WaitFuture(t, f)\n\t\t\t},\n\t\t},\n\t\t{\n\t\t\tName: \"leader transfer\",\n\t\t\tLeave: func(t *testing.T, cluster testcluster.RaftCluster, id string) {\n\t\t\t\tif cluster.GetLeader().GetLocalID() == id {\n\t\t\t\t\ttransfer := cluster.Raft(id).(*raftprevious.Raft).LeadershipTransfer()\n\t\t\t\t\tutils.WaitFuture(t, transfer)\n\t\t\t\t\tutils.WaitForNewLeader(t, id, cluster)\n\t\t\t\t}\n\t\t\t\tswitch cluster.GetLeader().GetRaft().(type) {\n\t\t\t\tcase *raftprevious.Raft:\n\t\t\t\t\tfr := cluster.GetLeader().GetRaft().(*raftprevious.Raft).RemoveServer(raftprevious.ServerID(id), 0, 0)\n\t\t\t\t\tutils.WaitFuture(t, fr)\n\t\t\t\t\tf := cluster.Raft(id).(*raftprevious.Raft).Shutdown()\n\t\t\t\t\tutils.WaitFuture(t, f)\n\t\t\t\tcase *raft.Raft:\n\t\t\t\t\tfr := cluster.GetLeader().GetRaft().(*raft.Raft).RemoveServer(raft.ServerID(id), 0, 0)\n\t\t\t\t\tutils.WaitFuture(t, fr)\n\t\t\t\t\tf := cluster.Raft(id).(*raftprevious.Raft).Shutdown()\n\t\t\t\t\tutils.WaitFuture(t, f)\n\t\t\t\t}\n\n\t\t\t},\n\t\t},\n\t}\n\n\tfor _, tc := range tcs {\n\t\tt.Run(tc.Name, func(t *testing.T) {\n\t\t\tinitCount := 3\n\t\t\tcluster := testcluster.NewPreviousRaftCluster(t, initCount, \"raftNode\")\n\t\t\tconfiguration := raftprevious.Configuration{}\n\n\t\t\tfor i := 0; i < initCount; i++ {\n\t\t\t\tvar err error\n\t\t\t\trequire.NoError(t, err)\n\t\t\t\tconfiguration.Servers = append(configuration.Servers, raftprevious.Server{\n\t\t\t\t\tID:      raftprevious.ServerID(cluster.ID(i)),\n\t\t\t\t\tAddress: raftprevious.ServerAddress(cluster.Addr(i)),\n\t\t\t\t})\n\t\t\t}\n\t\t\traft0 := cluster.Raft(cluster.ID(0)).(*raftprevious.Raft)\n\t\t\tboot := raft0.BootstrapCluster(configuration)\n\t\t\tif err := boot.Error(); err != nil {\n\t\t\t\tt.Fatalf(\"bootstrap err: %v\", err)\n\t\t\t}\n\t\t\tutils.WaitForNewLeader(t, \"\", cluster)\n\t\t\tgetLeader := cluster.GetLeader()\n\t\t\trequire.NotEmpty(t, getLeader)\n\t\t\ta, _ := getLeader.GetRaft().(*raftprevious.Raft).LeaderWithID()\n\t\t\trequire.NotEmpty(t, a)\n\t\t\tfuture := getLeader.GetRaft().(*raftprevious.Raft).Apply([]byte(\"test\"), time.Second)\n\t\t\tutils.WaitFuture(t, future)\n\n\t\t\tleader, _ := getLeader.GetRaft().(*raftprevious.Raft).LeaderWithID()\n\t\t\trequire.NotEmpty(t, leader)\n\t\t\t// Upgrade all the followers\n\t\t\tleaderIdx := 0\n\n\t\t\tfollowers := make([]string, 0)\n\t\t\tfor i := 0; i < initCount; i++ {\n\t\t\t\tif getLeader.GetLocalID() == cluster.ID(i) {\n\t\t\t\t\tleaderIdx = i\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\t\t\t\tfollowers = append(followers, cluster.ID(i))\n\t\t\t}\n\n\t\t\tfor _, f := range followers {\n\t\t\t\trequire.NotEqual(t, f, getLeader.GetLocalID())\n\t\t\t\t// Check Leader haven't changed\n\t\t\t\ta, _ := getLeader.GetRaft().(*raftprevious.Raft).LeaderWithID()\n\t\t\t\trequire.Equal(t, a, leader)\n\n\t\t\t\t//Create an upgraded node with the store\n\t\t\t\trUIT := testcluster.InitUIT(t, fmt.Sprintf(\"%s-new\", f))\n\t\t\t\tfuture := getLeader.GetRaft().(*raftprevious.Raft).AddVoter(raftprevious.ServerID(rUIT.GetLocalID()), raftprevious.ServerAddress(rUIT.GetLocalAddr()), 0, 0)\n\t\t\t\tutils.WaitFuture(t, future)\n\n\t\t\t\t//Add the new node to the cluster\n\n\t\t\t\ttc.Leave(t, cluster, f)\n\n\t\t\t\t//Delete the node from the cluster\n\t\t\t\tcluster.AddNode(rUIT)\n\t\t\t\tcluster.DeleteNode(f)\n\t\t\t}\n\n\t\t\t// Wait enough to have the configuration propagated.\n\t\t\ttime.Sleep(time.Second)\n\n\t\t\t//Apply some logs\n\t\t\tfuture = getLeader.GetRaft().(*raftprevious.Raft).Apply([]byte(\"test2\"), time.Second)\n\t\t\trequire.NoError(t, future.Error())\n\n\t\t\t// Check Leader haven't changed as we haven't replaced the leader yet\n\t\t\ta, _ = getLeader.GetRaft().(*raftprevious.Raft).LeaderWithID()\n\t\t\trequire.Equal(t, a, leader)\n\n\t\t\t//Remove and shutdown the leader node\n\t\t\ttc.Leave(t, cluster, getLeader.GetLocalID())\n\n\t\t\t// Delete the old leader node from the cluster\n\n\t\t\toldLeaderID := getLeader.GetLocalID()\n\n\t\t\t// Wait for a new leader to be elected\n\t\t\tutils.WaitForNewLeader(t, oldLeaderID, cluster)\n\t\t\tgetLeader = cluster.GetLeader()\n\t\t\trequire.NotEmpty(t, getLeader)\n\n\t\t\t// Create a new node to replace the deleted one\n\t\t\trUIT := testcluster.InitUIT(t, fmt.Sprintf(\"raftNew-%d\", leaderIdx))\n\t\t\tfa := getLeader.GetRaft().(*raft.Raft).AddVoter(raft.ServerID(rUIT.GetLocalID()), raft.ServerAddress(rUIT.GetLocalAddr()), 0, 0)\n\t\t\tutils.WaitFuture(t, fa)\n\n\t\t\t// Wait for new leader, (this happens because of not having prevote)\n\t\t\tutils.WaitForNewLeader(t, \"\", cluster)\n\t\t\tnewLeader := rUIT.GetLeaderID()\n\t\t\trequire.NotEmpty(t, newLeader)\n\t\t\trequire.NotEqual(t, newLeader, leader)\n\n\t\t\tcluster.DeleteNode(getLeader.GetLocalID())\n\t\t\trequire.Equal(t, rUIT.NumLogs(), 2)\n\t\t})\n\t}\n}\n\n// TestRaft_ReplaceUpgrade This test perform a rolling upgrade by removing an old node,\n// and create a new node with the same store until all old nodes are cycled to new nodes.\n// This simulate the advised way of upgrading in Consul.\nfunc TestRaft_ReplaceUpgrade(t *testing.T) {\n\n\ttcs := []struct {\n\t\tName  string\n\t\tLeave func(t *testing.T, cluster testcluster.RaftCluster, id string)\n\t}{\n\t\t{\n\t\t\tName: \"leave before shutdown\",\n\t\t\tLeave: func(t *testing.T, cluster testcluster.RaftCluster, id string) {\n\t\t\t\tfr := cluster.GetLeader().GetRaft().(*raftprevious.Raft).RemoveServer(raftprevious.ServerID(id), 0, 0)\n\t\t\t\tutils.WaitFuture(t, fr)\n\t\t\t\tf := cluster.Raft(id).(*raftprevious.Raft).Shutdown()\n\t\t\t\tutils.WaitFuture(t, f)\n\t\t\t},\n\t\t},\n\t\t{\n\t\t\tName: \"shutdown without leave\",\n\t\t\tLeave: func(t *testing.T, cluster testcluster.RaftCluster, id string) {\n\t\t\t\tf := cluster.Raft(id).(*raftprevious.Raft).Shutdown()\n\t\t\t\tutils.WaitFuture(t, f)\n\t\t\t},\n\t\t},\n\t\t{\n\t\t\tName: \"leader transfer\",\n\t\t\tLeave: func(t *testing.T, cluster testcluster.RaftCluster, id string) {\n\t\t\t\tif cluster.GetLeader().GetLocalID() == id {\n\t\t\t\t\ttransfer := cluster.Raft(id).(*raftprevious.Raft).LeadershipTransfer()\n\t\t\t\t\tutils.WaitFuture(t, transfer)\n\t\t\t\t}\n\t\t\t\tf := cluster.Raft(id).(*raftprevious.Raft).Shutdown()\n\t\t\t\tutils.WaitFuture(t, f)\n\t\t\t},\n\t\t},\n\t}\n\n\tfor _, tc := range tcs {\n\t\tt.Run(tc.Name, func(t *testing.T) {\n\t\t\tinitCount := 3\n\t\t\tcluster := testcluster.NewPreviousRaftCluster(t, initCount, \"raftNode\")\n\t\t\tconfiguration := raftprevious.Configuration{}\n\n\t\t\tfor i := 0; i < initCount; i++ {\n\t\t\t\tvar err error\n\t\t\t\trequire.NoError(t, err)\n\t\t\t\tconfiguration.Servers = append(configuration.Servers, raftprevious.Server{\n\t\t\t\t\tID:      raftprevious.ServerID(cluster.ID(i)),\n\t\t\t\t\tAddress: raftprevious.ServerAddress(cluster.Addr(i)),\n\t\t\t\t})\n\t\t\t}\n\t\t\traft0 := cluster.Raft(cluster.ID(0)).(*raftprevious.Raft)\n\t\t\tboot := raft0.BootstrapCluster(configuration)\n\t\t\tif err := boot.Error(); err != nil {\n\t\t\t\tt.Fatalf(\"bootstrap err: %v\", err)\n\t\t\t}\n\t\t\tutils.WaitForNewLeader(t, \"\", cluster)\n\t\t\tgetLeader := cluster.GetLeader()\n\t\t\trequire.NotEmpty(t, getLeader)\n\t\t\ta, _ := getLeader.GetRaft().(*raftprevious.Raft).LeaderWithID()\n\t\t\trequire.NotEmpty(t, a)\n\t\t\tfuture := getLeader.GetRaft().(*raftprevious.Raft).Apply([]byte(\"test\"), time.Second)\n\t\t\tutils.WaitFuture(t, future)\n\n\t\t\tleader, _ := getLeader.GetRaft().(*raftprevious.Raft).LeaderWithID()\n\t\t\trequire.NotEmpty(t, leader)\n\t\t\t// Upgrade all the followers\n\t\t\tfor i := 0; i < initCount; i++ {\n\t\t\t\tif getLeader.GetLocalID() == cluster.ID(i) {\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\n\t\t\t\t// Check Leader haven't changed\n\t\t\t\ta, _ := getLeader.GetRaft().(*raftprevious.Raft).LeaderWithID()\n\t\t\t\trequire.Equal(t, a, leader)\n\n\t\t\t\t//\n\t\t\t\ttc.Leave(t, cluster, cluster.ID(i))\n\n\t\t\t\t// Keep the store, to be passed to the upgraded node.\n\t\t\t\tstore := cluster.Store(cluster.ID(i))\n\t\t\t\tid := cluster.ID(i)\n\n\t\t\t\t//Delete the node from the cluster\n\t\t\t\tcluster.DeleteNode(cluster.ID(i))\n\n\t\t\t\t//Create an upgraded node with the store\n\t\t\t\trUIT := testcluster.InitUITWithStore(t, id, store.(*raftprevious.InmemStore), func(config *raft.Config) {})\n\t\t\t\tfuture := getLeader.GetRaft().(*raftprevious.Raft).AddVoter(raftprevious.ServerID(rUIT.GetLocalID()), raftprevious.ServerAddress(rUIT.GetLocalAddr()), 0, 0)\n\t\t\t\tutils.WaitFuture(t, future)\n\t\t\t\t//Add the new node to the cluster\n\t\t\t\tcluster.AddNode(rUIT)\n\t\t\t}\n\n\t\t\t// Wait enough to have the configuration propagated.\n\t\t\ttime.Sleep(time.Second)\n\n\t\t\t//Apply some logs\n\t\t\tfuture = getLeader.GetRaft().(*raftprevious.Raft).Apply([]byte(\"test2\"), time.Second)\n\t\t\trequire.NoError(t, future.Error())\n\n\t\t\t// Check Leader haven't changed as we haven't replaced the leader yet\n\t\t\ta, _ = getLeader.GetRaft().(*raftprevious.Raft).LeaderWithID()\n\t\t\trequire.Equal(t, a, leader)\n\n\t\t\t// keep a reference to the store\n\t\t\tstore := cluster.Store(getLeader.GetLocalID())\n\t\t\tid := getLeader.GetLocalID()\n\n\t\t\t//Remove and shutdown the leader node\n\t\t\ttc.Leave(t, cluster, getLeader.GetLocalID())\n\n\t\t\t// Delete the old leader node from the cluster\n\t\t\tcluster.DeleteNode(getLeader.GetLocalID())\n\t\t\toldLeaderID := getLeader.GetLocalID()\n\n\t\t\t// Wait for a new leader to be elected\n\t\t\tutils.WaitForNewLeader(t, oldLeaderID, cluster)\n\t\t\tgetLeader = cluster.GetLeader()\n\t\t\trequire.NotEmpty(t, getLeader)\n\n\t\t\t// Create a new node to replace the deleted one\n\t\t\trUIT := testcluster.InitUITWithStore(t, id, store.(*raftprevious.InmemStore), func(config *raft.Config) {})\n\t\t\tfa := getLeader.GetRaft().(*raft.Raft).AddVoter(raft.ServerID(rUIT.GetLocalID()), raft.ServerAddress(rUIT.GetLocalAddr()), 0, 0)\n\t\t\tutils.WaitFuture(t, fa)\n\n\t\t\t// Wait for new leader, (this happens because of not having prevote)\n\t\t\tutils.WaitForNewLeader(t, \"\", cluster)\n\t\t\tnewLeader := rUIT.GetLeaderID()\n\t\t\trequire.NotEmpty(t, newLeader)\n\n\t\t\trequire.NotEqual(t, newLeader, leader)\n\n\t\t\trequire.Equal(t, rUIT.NumLogs(), 2)\n\t\t})\n\t}\n}\n"
  },
  {
    "path": "raft-compat/testcluster/cluster.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage testcluster\n\nimport (\n\t\"fmt\"\n\t\"github.com/hashicorp/raft\"\n\traftprevious \"github.com/hashicorp/raft-previous-version\"\n\t\"github.com/stretchr/testify/require\"\n\t\"testing\"\n\t\"time\"\n)\n\ntype RaftUIT struct {\n\traft   *raft.Raft\n\ttrans  *raft.NetworkTransport\n\tConfig *raft.Config\n\tStore  *raft.InmemStore\n\tSnap   *raft.InmemSnapshotStore\n\tid     raft.ServerID\n\tfsm    *raft.MockFSM\n}\n\nfunc (r RaftUIT) NumLogs() int {\n\treturn len(r.fsm.Logs())\n}\n\nfunc (r RaftUIT) GetLocalAddr() string {\n\treturn string(r.trans.LocalAddr())\n}\n\nfunc (r RaftUIT) GetRaft() interface{} {\n\treturn r.raft\n}\n\nfunc (r RaftUIT) GetStore() interface{} {\n\treturn r.Store\n}\n\nfunc (r RaftUIT) GetLocalID() string {\n\treturn string(r.id)\n}\n\nfunc (r RaftUIT) GetLeaderID() string {\n\t_, id := r.raft.LeaderWithID()\n\treturn string(id)\n}\n\nfunc (r *RaftCluster) ID(i int) string {\n\treturn r.rafts[i].GetLocalID()\n}\nfunc (r *RaftCluster) Addr(i int) string {\n\treturn r.rafts[i].GetLocalAddr()\n}\n\nfunc (r *RaftCluster) Raft(id string) interface{} {\n\ti := r.GetIndex(id)\n\treturn r.rafts[i].GetRaft()\n}\n\nfunc (r *RaftCluster) Store(id string) interface{} {\n\ti := r.GetIndex(id)\n\treturn r.rafts[i].GetStore()\n}\n\ntype RaftLatest struct {\n\traft   *raftprevious.Raft\n\ttrans  *raftprevious.NetworkTransport\n\tConfig *raftprevious.Config\n\tStore  *raftprevious.InmemStore\n\tSnap   *raftprevious.InmemSnapshotStore\n\tid     raftprevious.ServerID\n\tfsm    *raftprevious.MockFSM\n}\n\nfunc (r RaftLatest) NumLogs() int {\n\treturn len(r.fsm.Logs())\n}\n\nfunc (r RaftLatest) GetLocalAddr() string {\n\treturn string(r.trans.LocalAddr())\n}\n\nfunc (r RaftLatest) GetRaft() interface{} {\n\treturn r.raft\n}\nfunc (r RaftLatest) GetStore() interface{} {\n\treturn r.Store\n}\n\nfunc (r RaftLatest) GetLocalID() string {\n\treturn string(r.id)\n}\n\nfunc (r RaftLatest) GetLeaderID() string {\n\t_, id := r.raft.LeaderWithID()\n\treturn string(id)\n}\n\ntype RaftNode interface {\n\tGetLocalID() string\n\tGetLocalAddr() string\n\tGetLeaderID() string\n\tGetRaft() interface{}\n\tGetStore() interface{}\n\tNumLogs() int\n}\n\ntype RaftCluster struct {\n\trafts []RaftNode\n}\n\nfunc NewRaftCluster(t *testing.T, f func(t *testing.T, id string) RaftNode, count int, name string) RaftCluster {\n\trc := RaftCluster{}\n\trc.rafts = make([]RaftNode, count)\n\tfor i := 0; i < count; i++ {\n\t\trc.rafts[i] = f(t, fmt.Sprintf(\"%s-%d\", name, i))\n\t}\n\treturn rc\n}\n\nfunc NewPreviousRaftCluster(t *testing.T, count int, name string) RaftCluster {\n\treturn NewRaftCluster(t, InitPrevious, count, name)\n}\n\nfunc NewUITRaftCluster(t *testing.T, count int, name string) RaftCluster {\n\treturn NewRaftCluster(t, InitUIT, count, name)\n}\n\nfunc (r *RaftCluster) GetLeader() RaftNode {\n\tfor _, n := range r.rafts {\n\t\tif n.GetLocalID() == n.GetLeaderID() {\n\t\t\treturn n\n\t\t}\n\t}\n\treturn nil\n}\n\nfunc (r *RaftCluster) Len() int {\n\treturn len(r.rafts)\n}\n\nfunc (r *RaftCluster) AddNode(node RaftNode) {\n\tr.rafts = append([]RaftNode{node}, r.rafts...)\n}\n\nfunc (r *RaftCluster) DeleteNode(id string) {\n\ti := r.GetIndex(id)\n\tr.rafts = append(r.rafts[:i], r.rafts[i+1:]...)\n}\n\nfunc (r *RaftCluster) GetIndex(id string) int {\n\ti := 0\n\tfor _, r := range r.rafts {\n\t\tif r.GetLocalID() == id {\n\t\t\treturn i\n\t\t}\n\t\ti++\n\t}\n\treturn -1\n}\n\nfunc InitUIT(t *testing.T, id string) RaftNode {\n\treturn InitUITWithStore(t, id, nil, func(config *raft.Config) {})\n}\n\nfunc InitUITWithStore(t *testing.T, id string, store *raftprevious.InmemStore, cfgMod func(config *raft.Config)) RaftNode {\n\tnode := RaftUIT{}\n\tnode.Config = raft.DefaultConfig()\n\tcfgMod(node.Config)\n\tnode.Config.HeartbeatTimeout = 50 * time.Millisecond\n\tnode.Config.ElectionTimeout = 50 * time.Millisecond\n\tnode.Config.LeaderLeaseTimeout = 50 * time.Millisecond\n\tnode.Config.CommitTimeout = 5 * time.Millisecond\n\tnode.id = raft.ServerID(id)\n\tnode.Config.LocalID = node.id\n\tif store != nil {\n\t\tnode.Store = convertInMemStoreToUIT(store)\n\t} else {\n\t\tnode.Store = raft.NewInmemStore()\n\t}\n\n\tnode.Snap = raft.NewInmemSnapshotStore()\n\tnode.fsm = &raft.MockFSM{}\n\tvar err error\n\tnode.trans, err = raft.NewTCPTransport(\"localhost:0\", nil, 2, time.Second, nil)\n\trequire.NoError(t, err)\n\tnode.raft, err = raft.NewRaft(node.Config, node.fsm, node.Store,\n\t\tnode.Store, node.Snap, node.trans)\n\trequire.NoError(t, err)\n\treturn node\n}\n\nfunc InitPrevious(t *testing.T, id string) RaftNode {\n\treturn InitPreviousWithStore(t, id, nil, func(config *raftprevious.Config) {\n\t})\n}\n\nfunc InitPreviousWithStore(t *testing.T, id string, store *raft.InmemStore, f func(config *raftprevious.Config)) RaftNode {\n\tnode := RaftLatest{}\n\tnode.Config = raftprevious.DefaultConfig()\n\tnode.Config.HeartbeatTimeout = 50 * time.Millisecond\n\tnode.Config.ElectionTimeout = 50 * time.Millisecond\n\tnode.Config.LeaderLeaseTimeout = 50 * time.Millisecond\n\tnode.Config.CommitTimeout = 5 * time.Millisecond\n\tnode.id = raftprevious.ServerID(id)\n\tnode.Config.LocalID = node.id\n\tf(node.Config)\n\n\tif store != nil {\n\t\tnode.Store = convertInMemStoreToPrevious(store)\n\t} else {\n\t\tnode.Store = raftprevious.NewInmemStore()\n\t}\n\tnode.Snap = raftprevious.NewInmemSnapshotStore()\n\tnode.fsm = &raftprevious.MockFSM{}\n\tvar err error\n\tnode.trans, err = raftprevious.NewTCPTransport(\"localhost:0\", nil, 2, time.Second, nil)\n\trequire.NoError(t, err)\n\tnode.raft, err = raftprevious.NewRaft(node.Config, node.fsm, node.Store,\n\t\tnode.Store, node.Snap, node.trans)\n\trequire.NoError(t, err)\n\treturn node\n}\n\nfunc convertLogToUIT(ll *raftprevious.Log) *raft.Log {\n\tl := new(raft.Log)\n\tl.Index = ll.Index\n\tl.AppendedAt = ll.AppendedAt\n\tl.Type = raft.LogType(ll.Type)\n\tl.Term = ll.Term\n\tl.Data = ll.Data\n\tl.Extensions = ll.Extensions\n\treturn l\n}\nfunc convertLogToPrevious(ll *raft.Log) *raftprevious.Log {\n\tl := new(raftprevious.Log)\n\tl.Index = ll.Index\n\tl.AppendedAt = ll.AppendedAt\n\tl.Type = raftprevious.LogType(ll.Type)\n\tl.Term = ll.Term\n\tl.Data = ll.Data\n\tl.Extensions = ll.Extensions\n\treturn l\n}\n\nvar (\n\tkeyCurrentTerm  = []byte(\"CurrentTerm\")\n\tkeyLastVoteTerm = []byte(\"LastVoteTerm\")\n\tkeyLastVoteCand = []byte(\"LastVoteCand\")\n)\n\nfunc convertInMemStoreToPrevious(s *raft.InmemStore) *raftprevious.InmemStore {\n\tss := raftprevious.NewInmemStore()\n\tfi, _ := s.FirstIndex()\n\tli, _ := s.LastIndex()\n\tfor i := fi; i <= li; i++ {\n\t\tlog := new(raft.Log)\n\t\ts.GetLog(i, log)\n\t\tss.StoreLog(convertLogToPrevious(log))\n\t}\n\n\tget, _ := ss.Get(keyCurrentTerm)\n\tss.Set(keyCurrentTerm, get)\n\n\tget, _ = ss.Get(keyLastVoteTerm)\n\tss.Set(keyLastVoteTerm, get)\n\n\tget, _ = ss.Get(keyLastVoteCand)\n\tss.Set(keyLastVoteCand, get)\n\n\tget64, _ := ss.GetUint64(keyCurrentTerm)\n\tss.SetUint64(keyCurrentTerm, get64)\n\n\tget64, _ = ss.GetUint64(keyLastVoteTerm)\n\tss.SetUint64(keyLastVoteTerm, get64)\n\n\tget64, _ = ss.GetUint64(keyLastVoteCand)\n\tss.SetUint64(keyLastVoteCand, get64)\n\n\treturn ss\n}\n\nfunc convertInMemStoreToUIT(s *raftprevious.InmemStore) *raft.InmemStore {\n\tss := raft.NewInmemStore()\n\tfi, _ := s.FirstIndex()\n\tli, _ := s.LastIndex()\n\tfor i := fi; i <= li; i++ {\n\t\tlog := new(raftprevious.Log)\n\t\ts.GetLog(i, log)\n\t\tss.StoreLog(convertLogToUIT(log))\n\t}\n\n\tget, _ := ss.Get(keyCurrentTerm)\n\tss.Set(keyCurrentTerm, get)\n\n\tget, _ = ss.Get(keyLastVoteTerm)\n\tss.Set(keyLastVoteTerm, get)\n\n\tget, _ = ss.Get(keyLastVoteCand)\n\tss.Set(keyLastVoteCand, get)\n\n\tget64, _ := ss.GetUint64(keyCurrentTerm)\n\tss.SetUint64(keyCurrentTerm, get64)\n\n\tget64, _ = ss.GetUint64(keyLastVoteTerm)\n\tss.SetUint64(keyLastVoteTerm, get64)\n\n\tget64, _ = ss.GetUint64(keyLastVoteCand)\n\tss.SetUint64(keyLastVoteCand, get64)\n\n\treturn ss\n}\n"
  },
  {
    "path": "raft-compat/utils/test_utils.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage utils\n\nimport (\n\t\"fmt\"\n\t\"github.com/hashicorp/raft\"\n\traftprevious \"github.com/hashicorp/raft-previous-version\"\n\t\"github.com/hashicorp/raft/compat/testcluster\"\n\t\"github.com/stretchr/testify/require\"\n\t\"testing\"\n\t\"time\"\n)\n\nfunc WaitForNewLeader(t *testing.T, oldLeader string, c testcluster.RaftCluster) {\n\n\tleader := func() string {\n\t\tfor i := 0; i < c.Len(); i++ {\n\t\t\tswitch r := c.Raft(c.ID(i)).(type) {\n\t\t\tcase *raft.Raft:\n\t\t\t\tif r.State() == raft.Leader {\n\t\t\t\t\treturn c.ID(i)\n\t\t\t\t}\n\t\t\tcase *raftprevious.Raft:\n\t\t\t\tif r.State() == raftprevious.Leader {\n\t\t\t\t\treturn c.ID(i)\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t\treturn \"\"\n\t}\n\tafter := time.After(5 * time.Second)\n\tticker := time.NewTicker(100 * time.Millisecond)\n\tfor {\n\t\tselect {\n\t\tcase <-after:\n\t\t\tt.Fatalf(\"timedout\")\n\t\tcase <-ticker.C:\n\t\t\tid := leader()\n\t\t\tif id != \"\" {\n\t\t\t\tif id != oldLeader || oldLeader == \"\" {\n\t\t\t\t\treturn\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n}\n\ntype future interface {\n\tError() error\n}\n\nfunc WaitFuture(t *testing.T, f future) {\n\ttimer := time.AfterFunc(1000*time.Millisecond, func() {\n\t\tpanic(fmt.Errorf(\"timeout waiting for future %v\", f))\n\t})\n\tdefer timer.Stop()\n\trequire.NoError(t, f.Error())\n}\n"
  },
  {
    "path": "raft.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"bytes\"\n\t\"container/list\"\n\t\"errors\"\n\t\"fmt\"\n\t\"io\"\n\t\"strings\"\n\t\"sync/atomic\"\n\t\"time\"\n\n\t\"github.com/hashicorp/go-hclog\"\n\n\t\"github.com/hashicorp/go-metrics/compat\"\n)\n\nconst (\n\tminCheckInterval          = 10 * time.Millisecond\n\toldestLogGaugeInterval    = 10 * time.Second\n\trpcUnexpectedCommandError = \"unexpected command\"\n)\n\nvar (\n\tkeyCurrentTerm  = []byte(\"CurrentTerm\")\n\tkeyLastVoteTerm = []byte(\"LastVoteTerm\")\n\tkeyLastVoteCand = []byte(\"LastVoteCand\")\n)\n\n// getRPCHeader returns an initialized RPCHeader struct for the given\n// Raft instance. This structure is sent along with RPC requests and\n// responses.\nfunc (r *Raft) getRPCHeader() RPCHeader {\n\treturn RPCHeader{\n\t\tProtocolVersion: r.config().ProtocolVersion,\n\t\tID:              []byte(r.config().LocalID),\n\t\tAddr:            r.trans.EncodePeer(r.config().LocalID, r.localAddr),\n\t}\n}\n\n// checkRPCHeader houses logic about whether this instance of Raft can process\n// the given RPC message.\nfunc (r *Raft) checkRPCHeader(rpc RPC) error {\n\t// Get the header off the RPC message.\n\twh, ok := rpc.Command.(WithRPCHeader)\n\tif !ok {\n\t\treturn fmt.Errorf(\"RPC does not have a header\")\n\t}\n\theader := wh.GetRPCHeader()\n\n\t// First check is to just make sure the code can understand the\n\t// protocol at all.\n\tif header.ProtocolVersion < ProtocolVersionMin ||\n\t\theader.ProtocolVersion > ProtocolVersionMax {\n\t\treturn ErrUnsupportedProtocol\n\t}\n\n\t// Second check is whether we should support this message, given the\n\t// current protocol we are configured to run. This will drop support\n\t// for protocol version 0 starting at protocol version 2, which is\n\t// currently what we want, and in general support one version back. We\n\t// may need to revisit this policy depending on how future protocol\n\t// changes evolve.\n\tif header.ProtocolVersion < r.config().ProtocolVersion-1 {\n\t\treturn ErrUnsupportedProtocol\n\t}\n\n\treturn nil\n}\n\n// getSnapshotVersion returns the snapshot version that should be used when\n// creating snapshots, given the protocol version in use.\nfunc getSnapshotVersion(protocolVersion ProtocolVersion) SnapshotVersion {\n\t// Right now we only have two versions and they are backwards compatible\n\t// so we don't need to look at the protocol version.\n\treturn 1\n}\n\n// commitTuple is used to send an index that was committed,\n// with an optional associated future that should be invoked.\ntype commitTuple struct {\n\tlog    *Log\n\tfuture *logFuture\n}\n\n// leaderState is state that is used while we are a leader.\ntype leaderState struct {\n\tleadershipTransferInProgress int32 // indicates that a leadership transfer is in progress.\n\tcommitCh                     chan struct{}\n\tcommitment                   *commitment\n\tinflight                     *list.List // list of logFuture in log index order\n\treplState                    map[ServerID]*followerReplication\n\tnotify                       map[*verifyFuture]struct{}\n\tstepDown                     chan struct{}\n}\n\n// setLeader is used to modify the current leader Address and ID of the cluster\nfunc (r *Raft) setLeader(leaderAddr ServerAddress, leaderID ServerID) {\n\tr.leaderLock.Lock()\n\toldLeaderAddr := r.leaderAddr\n\tr.leaderAddr = leaderAddr\n\toldLeaderID := r.leaderID\n\tr.leaderID = leaderID\n\tr.leaderLock.Unlock()\n\tif oldLeaderAddr != leaderAddr || oldLeaderID != leaderID {\n\t\tr.observe(LeaderObservation{Leader: leaderAddr, LeaderAddr: leaderAddr, LeaderID: leaderID})\n\t}\n}\n\n// requestConfigChange is a helper for the above functions that make\n// configuration change requests. 'req' describes the change. For timeout,\n// see AddVoter.\nfunc (r *Raft) requestConfigChange(req configurationChangeRequest, timeout time.Duration) IndexFuture {\n\tvar timer <-chan time.Time\n\tif timeout > 0 {\n\t\ttimer = time.After(timeout)\n\t}\n\tfuture := &configurationChangeFuture{\n\t\treq: req,\n\t}\n\tfuture.init()\n\tselect {\n\tcase <-timer:\n\t\treturn errorFuture{ErrEnqueueTimeout}\n\tcase r.configurationChangeCh <- future:\n\t\treturn future\n\tcase <-r.shutdownCh:\n\t\treturn errorFuture{ErrRaftShutdown}\n\t}\n}\n\n// run the main thread that handles leadership and RPC requests.\nfunc (r *Raft) run() {\n\tfor {\n\t\t// Check if we are doing a shutdown\n\t\tselect {\n\t\tcase <-r.shutdownCh:\n\t\t\t// Clear the leader to prevent forwarding\n\t\t\tr.setLeader(\"\", \"\")\n\t\t\treturn\n\t\tdefault:\n\t\t}\n\n\t\tswitch r.getState() {\n\t\tcase Follower:\n\t\t\tr.runFollower()\n\t\tcase Candidate:\n\t\t\tr.runCandidate()\n\t\tcase Leader:\n\t\t\tr.runLeader()\n\t\t}\n\t}\n}\n\n// runFollower runs the main loop while in the follower state.\nfunc (r *Raft) runFollower() {\n\tdidWarn := false\n\tleaderAddr, leaderID := r.LeaderWithID()\n\tr.logger.Info(\"entering follower state\", \"follower\", r, \"leader-address\", leaderAddr, \"leader-id\", leaderID)\n\tmetrics.IncrCounter([]string{\"raft\", \"state\", \"follower\"}, 1)\n\theartbeatTimer := randomTimeout(r.config().HeartbeatTimeout)\n\n\tfor r.getState() == Follower {\n\t\tr.mainThreadSaturation.sleeping()\n\n\t\tselect {\n\t\tcase rpc := <-r.rpcCh:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\tr.processRPC(rpc)\n\n\t\tcase c := <-r.configurationChangeCh:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\t// Reject any operations since we are not the leader\n\t\t\tc.respond(ErrNotLeader)\n\n\t\tcase a := <-r.applyCh:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\t// Reject any operations since we are not the leader\n\t\t\ta.respond(ErrNotLeader)\n\n\t\tcase v := <-r.verifyCh:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\t// Reject any operations since we are not the leader\n\t\t\tv.respond(ErrNotLeader)\n\n\t\tcase ur := <-r.userRestoreCh:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\t// Reject any restores since we are not the leader\n\t\t\tur.respond(ErrNotLeader)\n\n\t\tcase l := <-r.leadershipTransferCh:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\t// Reject any operations since we are not the leader\n\t\t\tl.respond(ErrNotLeader)\n\n\t\tcase c := <-r.configurationsCh:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\tc.configurations = r.configurations.Clone()\n\t\t\tc.respond(nil)\n\n\t\tcase b := <-r.bootstrapCh:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\tb.respond(r.liveBootstrap(b.configuration))\n\n\t\tcase <-r.leaderNotifyCh:\n\t\t\t//  Ignore since we are not the leader\n\n\t\tcase <-r.followerNotifyCh:\n\t\t\theartbeatTimer = time.After(0)\n\n\t\tcase <-heartbeatTimer:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\t// Restart the heartbeat timer\n\t\t\thbTimeout := r.config().HeartbeatTimeout\n\t\t\theartbeatTimer = randomTimeout(hbTimeout)\n\n\t\t\t// Check if we have had a successful contact\n\t\t\tlastContact := r.LastContact()\n\t\t\tif time.Since(lastContact) < hbTimeout {\n\t\t\t\tcontinue\n\t\t\t}\n\n\t\t\t// Heartbeat failed! Transition to the candidate state\n\t\t\tlastLeaderAddr, lastLeaderID := r.LeaderWithID()\n\t\t\tr.setLeader(\"\", \"\")\n\n\t\t\tif r.configurations.latestIndex == 0 {\n\t\t\t\tif !didWarn {\n\t\t\t\t\tr.logger.Warn(\"no known peers, aborting election\")\n\t\t\t\t\tdidWarn = true\n\t\t\t\t}\n\t\t\t} else if r.configurations.latestIndex == r.configurations.committedIndex &&\n\t\t\t\t!hasVote(r.configurations.latest, r.localID) {\n\t\t\t\tif !didWarn {\n\t\t\t\t\tr.logger.Warn(\"not part of stable configuration, aborting election\")\n\t\t\t\t\tdidWarn = true\n\t\t\t\t}\n\t\t\t} else {\n\t\t\t\tmetrics.IncrCounter([]string{\"raft\", \"transition\", \"heartbeat_timeout\"}, 1)\n\t\t\t\tif hasVote(r.configurations.latest, r.localID) {\n\t\t\t\t\tr.logger.Warn(\"heartbeat timeout reached, starting election\", \"last-leader-addr\", lastLeaderAddr, \"last-leader-id\", lastLeaderID)\n\t\t\t\t\tr.setState(Candidate)\n\t\t\t\t\treturn\n\t\t\t\t} else if !didWarn {\n\t\t\t\t\tr.logger.Warn(\"heartbeat timeout reached, not part of a stable configuration or a non-voter, not triggering a leader election\")\n\t\t\t\t\tdidWarn = true\n\t\t\t\t}\n\t\t\t}\n\n\t\tcase <-r.shutdownCh:\n\t\t\treturn\n\t\t}\n\t}\n}\n\n// liveBootstrap attempts to seed an initial configuration for the cluster. See\n// the Raft object's member BootstrapCluster for more details. This must only be\n// called on the main thread, and only makes sense in the follower state.\nfunc (r *Raft) liveBootstrap(configuration Configuration) error {\n\tif !hasVote(configuration, r.localID) {\n\t\t// Reject this operation since we are not a voter\n\t\treturn ErrNotVoter\n\t}\n\n\t// Use the pre-init API to make the static updates.\n\tcfg := r.config()\n\terr := BootstrapCluster(&cfg, r.logs, r.stable, r.snapshots, r.trans, configuration)\n\tif err != nil {\n\t\treturn err\n\t}\n\n\t// Make the configuration live.\n\tvar entry Log\n\tif err := r.logs.GetLog(1, &entry); err != nil {\n\t\tpanic(err)\n\t}\n\tr.setCurrentTerm(1)\n\tr.setLastLog(entry.Index, entry.Term)\n\treturn r.processConfigurationLogEntry(&entry)\n}\n\n// runCandidate runs the main loop while in the candidate state.\nfunc (r *Raft) runCandidate() {\n\tterm := r.getCurrentTerm() + 1\n\tr.logger.Info(\"entering candidate state\", \"node\", r, \"term\", term)\n\tmetrics.IncrCounter([]string{\"raft\", \"state\", \"candidate\"}, 1)\n\n\t// Start vote for us, and set a timeout\n\tvar voteCh <-chan *voteResult\n\tvar prevoteCh <-chan *preVoteResult\n\n\t// check if pre-vote is active and that this is not a leader transfer.\n\t// Leader transfer do not perform prevote by design\n\tif !r.preVoteDisabled && !r.candidateFromLeadershipTransfer.Load() {\n\t\tprevoteCh = r.preElectSelf()\n\t} else {\n\t\tvoteCh = r.electSelf()\n\t}\n\n\t// Make sure the leadership transfer flag is reset after each run. Having this\n\t// flag will set the field LeadershipTransfer in a RequestVoteRequest to true,\n\t// which will make other servers vote even though they have a leader already.\n\t// It is important to reset that flag, because this privilege could be abused\n\t// otherwise.\n\tdefer func() { r.candidateFromLeadershipTransfer.Store(false) }()\n\n\telectionTimeout := r.config().ElectionTimeout\n\telectionTimer := randomTimeout(electionTimeout)\n\n\t// Tally the votes, need a simple majority\n\tpreVoteGrantedVotes := 0\n\tpreVoteRefusedVotes := 0\n\tgrantedVotes := 0\n\tvotesNeeded := r.quorumSize()\n\tr.logger.Debug(\"calculated votes needed\", \"needed\", votesNeeded, \"term\", term)\n\n\tfor r.getState() == Candidate {\n\t\tr.mainThreadSaturation.sleeping()\n\n\t\tselect {\n\t\tcase rpc := <-r.rpcCh:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\tr.processRPC(rpc)\n\t\tcase preVote := <-prevoteCh:\n\t\t\t// This a pre-vote case it should trigger a \"real\" election if the pre-vote is won.\n\t\t\tr.mainThreadSaturation.working()\n\t\t\tr.logger.Debug(\"pre-vote received\", \"from\", preVote.voterID, \"term\", preVote.Term, \"tally\", preVoteGrantedVotes)\n\t\t\t// Check if the term is greater than ours, bail\n\t\t\tif preVote.Term > term {\n\t\t\t\tr.logger.Debug(\"pre-vote denied: found newer term, falling back to follower\", \"term\", preVote.Term)\n\t\t\t\tr.setState(Follower)\n\t\t\t\tr.setCurrentTerm(preVote.Term)\n\t\t\t\treturn\n\t\t\t}\n\n\t\t\t// Check if the preVote is granted\n\t\t\tif preVote.Granted {\n\t\t\t\tpreVoteGrantedVotes++\n\t\t\t\tr.logger.Debug(\"pre-vote granted\", \"from\", preVote.voterID, \"term\", preVote.Term, \"tally\", preVoteGrantedVotes)\n\t\t\t} else {\n\t\t\t\tpreVoteRefusedVotes++\n\t\t\t\tr.logger.Debug(\"pre-vote denied\", \"from\", preVote.voterID, \"term\", preVote.Term, \"tally\", preVoteGrantedVotes)\n\t\t\t}\n\n\t\t\t// Check if we've won the pre-vote and proceed to election if so\n\t\t\tif preVoteGrantedVotes >= votesNeeded {\n\t\t\t\tr.logger.Info(\"pre-vote successful, starting election\", \"term\", preVote.Term,\n\t\t\t\t\t\"tally\", preVoteGrantedVotes, \"refused\", preVoteRefusedVotes, \"votesNeeded\", votesNeeded)\n\t\t\t\tpreVoteGrantedVotes = 0\n\t\t\t\tpreVoteRefusedVotes = 0\n\t\t\t\telectionTimer = randomTimeout(electionTimeout)\n\t\t\t\tprevoteCh = nil\n\t\t\t\tvoteCh = r.electSelf()\n\t\t\t}\n\t\t\t// Check if we've lost the pre-vote and wait for the election to timeout so we can do another time of\n\t\t\t// prevote.\n\t\t\tif preVoteRefusedVotes >= votesNeeded {\n\t\t\t\tr.logger.Info(\"pre-vote campaign failed, waiting for election timeout\", \"term\", preVote.Term,\n\t\t\t\t\t\"tally\", preVoteGrantedVotes, \"refused\", preVoteRefusedVotes, \"votesNeeded\", votesNeeded)\n\t\t\t}\n\t\tcase vote := <-voteCh:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\t// Check if the term is greater than ours, bail\n\t\t\tif vote.Term > r.getCurrentTerm() {\n\t\t\t\tr.logger.Debug(\"newer term discovered, fallback to follower\", \"term\", vote.Term)\n\t\t\t\tr.setState(Follower)\n\t\t\t\tr.setCurrentTerm(vote.Term)\n\t\t\t\treturn\n\t\t\t}\n\n\t\t\t// Check if the vote is granted\n\t\t\tif vote.Granted {\n\t\t\t\tgrantedVotes++\n\t\t\t\tr.logger.Debug(\"vote granted\", \"from\", vote.voterID, \"term\", vote.Term, \"tally\", grantedVotes)\n\t\t\t}\n\n\t\t\t// Check if we've become the leader\n\t\t\tif grantedVotes >= votesNeeded {\n\t\t\t\tr.logger.Info(\"election won\", \"term\", vote.Term, \"tally\", grantedVotes)\n\t\t\t\tr.setState(Leader)\n\t\t\t\tr.setLeader(r.localAddr, r.localID)\n\t\t\t\treturn\n\t\t\t}\n\t\tcase c := <-r.configurationChangeCh:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\t// Reject any operations since we are not the leader\n\t\t\tc.respond(ErrNotLeader)\n\n\t\tcase a := <-r.applyCh:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\t// Reject any operations since we are not the leader\n\t\t\ta.respond(ErrNotLeader)\n\n\t\tcase v := <-r.verifyCh:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\t// Reject any operations since we are not the leader\n\t\t\tv.respond(ErrNotLeader)\n\n\t\tcase ur := <-r.userRestoreCh:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\t// Reject any restores since we are not the leader\n\t\t\tur.respond(ErrNotLeader)\n\n\t\tcase l := <-r.leadershipTransferCh:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\t// Reject any operations since we are not the leader\n\t\t\tl.respond(ErrNotLeader)\n\n\t\tcase c := <-r.configurationsCh:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\tc.configurations = r.configurations.Clone()\n\t\t\tc.respond(nil)\n\n\t\tcase b := <-r.bootstrapCh:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\tb.respond(ErrCantBootstrap)\n\n\t\tcase <-r.leaderNotifyCh:\n\t\t\t//  Ignore since we are not the leader\n\n\t\tcase <-r.followerNotifyCh:\n\t\t\tif electionTimeout != r.config().ElectionTimeout {\n\t\t\t\telectionTimeout = r.config().ElectionTimeout\n\t\t\t\telectionTimer = randomTimeout(electionTimeout)\n\t\t\t}\n\n\t\tcase <-electionTimer:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\t// Election failed! Restart the election. We simply return,\n\t\t\t// which will kick us back into runCandidate\n\t\t\tr.logger.Warn(\"Election timeout reached, restarting election\")\n\t\t\treturn\n\n\t\tcase <-r.shutdownCh:\n\t\t\treturn\n\t\t}\n\t}\n}\n\nfunc (r *Raft) setLeadershipTransferInProgress(v bool) {\n\tif v {\n\t\tatomic.StoreInt32(&r.leaderState.leadershipTransferInProgress, 1)\n\t} else {\n\t\tatomic.StoreInt32(&r.leaderState.leadershipTransferInProgress, 0)\n\t}\n}\n\nfunc (r *Raft) getLeadershipTransferInProgress() bool {\n\tv := atomic.LoadInt32(&r.leaderState.leadershipTransferInProgress)\n\treturn v == 1\n}\n\nfunc (r *Raft) setupLeaderState() {\n\tr.leaderState.commitCh = make(chan struct{}, 1)\n\tr.leaderState.commitment = newCommitment(r.leaderState.commitCh,\n\t\tr.configurations.latest,\n\t\tr.getLastIndex()+1 /* first index that may be committed in this term */)\n\tr.leaderState.inflight = list.New()\n\tr.leaderState.replState = make(map[ServerID]*followerReplication)\n\tr.leaderState.notify = make(map[*verifyFuture]struct{})\n\tr.leaderState.stepDown = make(chan struct{}, 1)\n}\n\n// runLeader runs the main loop while in leader state. Do the setup here and drop into\n// the leaderLoop for the hot loop.\nfunc (r *Raft) runLeader() {\n\tr.logger.Info(\"entering leader state\", \"leader\", r)\n\tmetrics.IncrCounter([]string{\"raft\", \"state\", \"leader\"}, 1)\n\n\t// Notify that we are the leader\n\toverrideNotifyBool(r.leaderCh, true)\n\n\t// Store the notify chan. It's not reloadable so shouldn't change before the\n\t// defer below runs, but this makes sure we always notify the same chan if\n\t// ever for both gaining and losing leadership.\n\tnotify := r.config().NotifyCh\n\n\t// Push to the notify channel if given\n\tif notify != nil {\n\t\tselect {\n\t\tcase notify <- true:\n\t\tcase <-r.shutdownCh:\n\t\t\t// make sure push to the notify channel ( if given )\n\t\t\tselect {\n\t\t\tcase notify <- true:\n\t\t\tdefault:\n\t\t\t}\n\t\t}\n\t}\n\n\t// setup leader state. This is only supposed to be accessed within the\n\t// leaderloop.\n\tr.setupLeaderState()\n\n\t// Run a background go-routine to emit metrics on log age\n\tstopCh := make(chan struct{})\n\tgo emitLogStoreMetrics(r.logs, []string{\"raft\", \"leader\"}, oldestLogGaugeInterval, stopCh)\n\n\t// Cleanup state on step down\n\tdefer func() {\n\t\tclose(stopCh)\n\n\t\t// Since we were the leader previously, we update our\n\t\t// last contact time when we step down, so that we are not\n\t\t// reporting a last contact time from before we were the\n\t\t// leader. Otherwise, to a client it would seem our data\n\t\t// is extremely stale.\n\t\tr.setLastContact()\n\n\t\t// Stop replication\n\t\tfor _, p := range r.leaderState.replState {\n\t\t\tclose(p.stopCh)\n\t\t}\n\n\t\t// Respond to all inflight operations\n\t\tfor e := r.leaderState.inflight.Front(); e != nil; e = e.Next() {\n\t\t\te.Value.(*logFuture).respond(ErrLeadershipLost)\n\t\t}\n\n\t\t// Respond to any pending verify requests\n\t\tfor future := range r.leaderState.notify {\n\t\t\tfuture.respond(ErrLeadershipLost)\n\t\t}\n\n\t\t// Clear all the state\n\t\tr.leaderState.commitCh = nil\n\t\tr.leaderState.commitment = nil\n\t\tr.leaderState.inflight = nil\n\t\tr.leaderState.replState = nil\n\t\tr.leaderState.notify = nil\n\t\tr.leaderState.stepDown = nil\n\n\t\t// If we are stepping down for some reason, no known leader.\n\t\t// We may have stepped down due to an RPC call, which would\n\t\t// provide the leader, so we cannot always blank this out.\n\t\tr.leaderLock.Lock()\n\t\tif r.leaderAddr == r.localAddr && r.leaderID == r.localID {\n\t\t\tr.leaderAddr = \"\"\n\t\t\tr.leaderID = \"\"\n\t\t}\n\t\tr.leaderLock.Unlock()\n\n\t\t// Notify that we are not the leader\n\t\toverrideNotifyBool(r.leaderCh, false)\n\n\t\t// Push to the notify channel if given\n\t\tif notify != nil {\n\t\t\tselect {\n\t\t\tcase notify <- false:\n\t\t\tcase <-r.shutdownCh:\n\t\t\t\t// On shutdown, make a best effort but do not block\n\t\t\t\tselect {\n\t\t\t\tcase notify <- false:\n\t\t\t\tdefault:\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}()\n\n\t// Start a replication routine for each peer\n\tr.startStopReplication()\n\n\t// Dispatch a no-op log entry first. This gets this leader up to the latest\n\t// possible commit index, even in the absence of client commands. This used\n\t// to append a configuration entry instead of a noop. However, that permits\n\t// an unbounded number of uncommitted configurations in the log. We now\n\t// maintain that there exists at most one uncommitted configuration entry in\n\t// any log, so we have to do proper no-ops here.\n\tnoop := &logFuture{log: Log{Type: LogNoop}}\n\tr.dispatchLogs([]*logFuture{noop})\n\n\t// Sit in the leader loop until we step down\n\tr.leaderLoop()\n}\n\n// startStopReplication will set up state and start asynchronous replication to\n// new peers, and stop replication to removed peers. Before removing a peer,\n// it'll instruct the replication routines to try to replicate to the current\n// index. This must only be called from the main thread.\nfunc (r *Raft) startStopReplication() {\n\tinConfig := make(map[ServerID]bool, len(r.configurations.latest.Servers))\n\tlastIdx := r.getLastIndex()\n\n\t// Start replication goroutines that need starting\n\tfor _, server := range r.configurations.latest.Servers {\n\t\tif server.ID == r.localID {\n\t\t\tcontinue\n\t\t}\n\n\t\tinConfig[server.ID] = true\n\n\t\ts, ok := r.leaderState.replState[server.ID]\n\t\tif !ok {\n\t\t\tr.logger.Info(\"added peer, starting replication\", \"peer\", server.ID)\n\t\t\ts = &followerReplication{\n\t\t\t\tpeer:                server,\n\t\t\t\tcommitment:          r.leaderState.commitment,\n\t\t\t\tstopCh:              make(chan uint64, 1),\n\t\t\t\ttriggerCh:           make(chan struct{}, 1),\n\t\t\t\ttriggerDeferErrorCh: make(chan *deferError, 1),\n\t\t\t\tcurrentTerm:         r.getCurrentTerm(),\n\t\t\t\tnextIndex:           lastIdx + 1,\n\t\t\t\tlastContact:         time.Now(),\n\t\t\t\tnotify:              make(map[*verifyFuture]struct{}),\n\t\t\t\tnotifyCh:            make(chan struct{}, 1),\n\t\t\t\tstepDown:            r.leaderState.stepDown,\n\t\t\t}\n\n\t\t\tr.leaderState.replState[server.ID] = s\n\t\t\tr.goFunc(func() { r.replicate(s) })\n\t\t\tasyncNotifyCh(s.triggerCh)\n\t\t\tr.observe(PeerObservation{Peer: server, Removed: false})\n\t\t} else if ok {\n\n\t\t\ts.peerLock.RLock()\n\t\t\tpeer := s.peer\n\t\t\ts.peerLock.RUnlock()\n\n\t\t\tif peer.Address != server.Address {\n\t\t\t\tr.logger.Info(\"updating peer\", \"peer\", server.ID)\n\t\t\t\ts.peerLock.Lock()\n\t\t\t\ts.peer = server\n\t\t\t\ts.peerLock.Unlock()\n\t\t\t}\n\t\t}\n\t}\n\n\t// Stop replication goroutines that need stopping\n\tfor serverID, repl := range r.leaderState.replState {\n\t\tif inConfig[serverID] {\n\t\t\tcontinue\n\t\t}\n\t\t// Replicate up to lastIdx and stop\n\t\tr.logger.Info(\"removed peer, stopping replication\", \"peer\", serverID, \"last-index\", lastIdx)\n\t\trepl.stopCh <- lastIdx\n\t\tclose(repl.stopCh)\n\t\tdelete(r.leaderState.replState, serverID)\n\t\tr.observe(PeerObservation{Peer: repl.peer, Removed: true})\n\t}\n\n\t// Update peers metric\n\tmetrics.SetGauge([]string{\"raft\", \"peers\"}, float32(len(r.configurations.latest.Servers)))\n}\n\n// configurationChangeChIfStable returns r.configurationChangeCh if it's safe\n// to process requests from it, or nil otherwise. This must only be called\n// from the main thread.\n//\n// Note that if the conditions here were to change outside of leaderLoop to take\n// this from nil to non-nil, we would need leaderLoop to be kicked.\nfunc (r *Raft) configurationChangeChIfStable() chan *configurationChangeFuture {\n\t// Have to wait until:\n\t// 1. The latest configuration is committed, and\n\t// 2. This leader has committed some entry (the noop) in this term\n\t//    https://groups.google.com/forum/#!msg/raft-dev/t4xj6dJTP6E/d2D9LrWRza8J\n\tif r.configurations.latestIndex == r.configurations.committedIndex &&\n\t\tr.getCommitIndex() >= r.leaderState.commitment.startIndex {\n\t\treturn r.configurationChangeCh\n\t}\n\treturn nil\n}\n\n// leaderLoop is the hot loop for a leader. It is invoked\n// after all the various leader setup is done.\nfunc (r *Raft) leaderLoop() {\n\t// stepDown is used to track if there is an inflight log that\n\t// would cause us to lose leadership (specifically a RemovePeer of\n\t// ourselves). If this is the case, we must not allow any logs to\n\t// be processed in parallel, otherwise we are basing commit on\n\t// only a single peer (ourself) and replicating to an undefined set\n\t// of peers.\n\tstepDown := false\n\t// This is only used for the first lease check, we reload lease below\n\t// based on the current config value.\n\tlease := time.After(r.config().LeaderLeaseTimeout)\n\n\tfor r.getState() == Leader {\n\t\tr.mainThreadSaturation.sleeping()\n\n\t\tselect {\n\t\tcase rpc := <-r.rpcCh:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\tr.processRPC(rpc)\n\n\t\tcase <-r.leaderState.stepDown:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\tr.setState(Follower)\n\n\t\tcase future := <-r.leadershipTransferCh:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\tif r.getLeadershipTransferInProgress() {\n\t\t\t\tr.logger.Debug(ErrLeadershipTransferInProgress.Error())\n\t\t\t\tfuture.respond(ErrLeadershipTransferInProgress)\n\t\t\t\tcontinue\n\t\t\t}\n\n\t\t\tr.logger.Debug(\"starting leadership transfer\", \"id\", future.ID, \"address\", future.Address)\n\n\t\t\t// When we are leaving leaderLoop, we are no longer\n\t\t\t// leader, so we should stop transferring.\n\t\t\tleftLeaderLoop := make(chan struct{})\n\t\t\tdefer func() { close(leftLeaderLoop) }()\n\n\t\t\tstopCh := make(chan struct{})\n\t\t\tdoneCh := make(chan error, 1)\n\n\t\t\t// This is intentionally being setup outside of the\n\t\t\t// leadershipTransfer function. Because the TimeoutNow\n\t\t\t// call is blocking and there is no way to abort that\n\t\t\t// in case eg the timer expires.\n\t\t\t// The leadershipTransfer function is controlled with\n\t\t\t// the stopCh and doneCh.\n\t\t\t// No matter how this exits, have this function set\n\t\t\t// leadership transfer to false before we return\n\t\t\t//\n\t\t\t// Note that this leaves a window where callers of\n\t\t\t// LeadershipTransfer() and LeadershipTransferToServer()\n\t\t\t// may start executing after they get their future but before\n\t\t\t// this routine has set leadershipTransferInProgress back to false.\n\t\t\t// It may be safe to modify things such that setLeadershipTransferInProgress\n\t\t\t// is set to false before calling future.Respond, but that still needs\n\t\t\t// to be tested and this situation mirrors what callers already had to deal with.\n\t\t\tgo func() {\n\t\t\t\tdefer r.setLeadershipTransferInProgress(false)\n\t\t\t\tselect {\n\t\t\t\tcase <-time.After(r.config().ElectionTimeout):\n\t\t\t\t\tclose(stopCh)\n\t\t\t\t\terr := fmt.Errorf(\"leadership transfer timeout\")\n\t\t\t\t\tr.logger.Debug(err.Error())\n\t\t\t\t\tfuture.respond(err)\n\t\t\t\t\t<-doneCh\n\t\t\t\tcase <-leftLeaderLoop:\n\t\t\t\t\tclose(stopCh)\n\t\t\t\t\terr := fmt.Errorf(\"lost leadership during transfer (expected)\")\n\t\t\t\t\tr.logger.Debug(err.Error())\n\t\t\t\t\tfuture.respond(nil)\n\t\t\t\t\t<-doneCh\n\t\t\t\tcase err := <-doneCh:\n\t\t\t\t\tif err != nil {\n\t\t\t\t\t\tr.logger.Debug(err.Error())\n\t\t\t\t\t\tfuture.respond(err)\n\t\t\t\t\t} else {\n\t\t\t\t\t\t// Wait for up to ElectionTimeout before flagging the\n\t\t\t\t\t\t// leadership transfer as done and unblocking applies in\n\t\t\t\t\t\t// the leaderLoop.\n\t\t\t\t\t\tselect {\n\t\t\t\t\t\tcase <-time.After(r.config().ElectionTimeout):\n\t\t\t\t\t\t\terr := fmt.Errorf(\"leadership transfer timeout\")\n\t\t\t\t\t\t\tr.logger.Debug(err.Error())\n\t\t\t\t\t\t\tfuture.respond(err)\n\t\t\t\t\t\tcase <-leftLeaderLoop:\n\t\t\t\t\t\t\tr.logger.Debug(\"lost leadership during transfer (expected)\")\n\t\t\t\t\t\t\tfuture.respond(nil)\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}()\n\n\t\t\t// leaderState.replState is accessed here before\n\t\t\t// starting leadership transfer asynchronously because\n\t\t\t// leaderState is only supposed to be accessed in the\n\t\t\t// leaderloop.\n\t\t\tid := future.ID\n\t\t\taddress := future.Address\n\t\t\tif id == nil {\n\t\t\t\ts := r.pickServer()\n\t\t\t\tif s != nil {\n\t\t\t\t\tid = &s.ID\n\t\t\t\t\taddress = &s.Address\n\t\t\t\t} else {\n\t\t\t\t\tdoneCh <- fmt.Errorf(\"cannot find peer\")\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\t\t\t}\n\t\t\tstate, ok := r.leaderState.replState[*id]\n\t\t\tif !ok {\n\t\t\t\tdoneCh <- fmt.Errorf(\"cannot find replication state for %v\", id)\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\tr.setLeadershipTransferInProgress(true)\n\t\t\tgo r.leadershipTransfer(*id, *address, state, stopCh, doneCh)\n\n\t\tcase <-r.leaderState.commitCh:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\t// Process the newly committed entries\n\t\t\toldCommitIndex := r.getCommitIndex()\n\t\t\tcommitIndex := r.leaderState.commitment.getCommitIndex()\n\t\t\tr.setCommitIndex(commitIndex)\n\n\t\t\t// New configuration has been committed, set it as the committed\n\t\t\t// value.\n\t\t\tif r.configurations.latestIndex > oldCommitIndex &&\n\t\t\t\tr.configurations.latestIndex <= commitIndex {\n\t\t\t\tr.setCommittedConfiguration(r.configurations.latest, r.configurations.latestIndex)\n\t\t\t\tif !hasVote(r.configurations.committed, r.localID) {\n\t\t\t\t\tstepDown = true\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tstart := time.Now()\n\t\t\tvar groupReady []*list.Element\n\t\t\tgroupFutures := make(map[uint64]*logFuture)\n\t\t\tvar lastIdxInGroup uint64\n\n\t\t\t// Pull all inflight logs that are committed off the queue.\n\t\t\tfor e := r.leaderState.inflight.Front(); e != nil; e = e.Next() {\n\t\t\t\tcommitLog := e.Value.(*logFuture)\n\t\t\t\tidx := commitLog.log.Index\n\t\t\t\tif idx > commitIndex {\n\t\t\t\t\t// Don't go past the committed index\n\t\t\t\t\tbreak\n\t\t\t\t}\n\n\t\t\t\t// Measure the commit time\n\t\t\t\tmetrics.MeasureSince([]string{\"raft\", \"commitTime\"}, commitLog.dispatch)\n\t\t\t\tgroupReady = append(groupReady, e)\n\t\t\t\tgroupFutures[idx] = commitLog\n\t\t\t\tlastIdxInGroup = idx\n\t\t\t}\n\n\t\t\t// Process the group\n\t\t\tif len(groupReady) != 0 {\n\t\t\t\tr.processLogs(lastIdxInGroup, groupFutures)\n\n\t\t\t\tfor _, e := range groupReady {\n\t\t\t\t\tr.leaderState.inflight.Remove(e)\n\t\t\t\t}\n\t\t\t}\n\n\t\t\t// Measure the time to enqueue batch of logs for FSM to apply\n\t\t\tmetrics.MeasureSince([]string{\"raft\", \"fsm\", \"enqueue\"}, start)\n\n\t\t\t// Count the number of logs enqueued\n\t\t\tmetrics.SetGauge([]string{\"raft\", \"commitNumLogs\"}, float32(len(groupReady)))\n\n\t\t\tif stepDown {\n\t\t\t\tif r.config().ShutdownOnRemove {\n\t\t\t\t\tr.logger.Info(\"removed ourself, shutting down\")\n\t\t\t\t\tr.Shutdown()\n\t\t\t\t} else {\n\t\t\t\t\tr.logger.Info(\"removed ourself, transitioning to follower\")\n\t\t\t\t\tr.setState(Follower)\n\t\t\t\t}\n\t\t\t}\n\n\t\tcase v := <-r.verifyCh:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\tif v.quorumSize == 0 {\n\t\t\t\t// Just dispatched, start the verification\n\t\t\t\tr.verifyLeader(v)\n\t\t\t} else if v.votes < v.quorumSize {\n\t\t\t\t// Early return, means there must be a new leader\n\t\t\t\tr.logger.Warn(\"new leader elected, stepping down\")\n\t\t\t\tr.setState(Follower)\n\t\t\t\tdelete(r.leaderState.notify, v)\n\t\t\t\tfor _, repl := range r.leaderState.replState {\n\t\t\t\t\trepl.cleanNotify(v)\n\t\t\t\t}\n\t\t\t\tv.respond(ErrNotLeader)\n\n\t\t\t} else {\n\t\t\t\t// Quorum of members agree, we are still leader\n\t\t\t\tdelete(r.leaderState.notify, v)\n\t\t\t\tfor _, repl := range r.leaderState.replState {\n\t\t\t\t\trepl.cleanNotify(v)\n\t\t\t\t}\n\t\t\t\tv.respond(nil)\n\t\t\t}\n\n\t\tcase future := <-r.userRestoreCh:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\tif r.getLeadershipTransferInProgress() {\n\t\t\t\tr.logger.Debug(ErrLeadershipTransferInProgress.Error())\n\t\t\t\tfuture.respond(ErrLeadershipTransferInProgress)\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\terr := r.restoreUserSnapshot(future.meta, future.reader)\n\t\t\tfuture.respond(err)\n\n\t\tcase future := <-r.configurationsCh:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\tif r.getLeadershipTransferInProgress() {\n\t\t\t\tr.logger.Debug(ErrLeadershipTransferInProgress.Error())\n\t\t\t\tfuture.respond(ErrLeadershipTransferInProgress)\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\tfuture.configurations = r.configurations.Clone()\n\t\t\tfuture.respond(nil)\n\n\t\tcase future := <-r.configurationChangeChIfStable():\n\t\t\tr.mainThreadSaturation.working()\n\t\t\tif r.getLeadershipTransferInProgress() {\n\t\t\t\tr.logger.Debug(ErrLeadershipTransferInProgress.Error())\n\t\t\t\tfuture.respond(ErrLeadershipTransferInProgress)\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\tr.appendConfigurationEntry(future)\n\n\t\tcase b := <-r.bootstrapCh:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\tb.respond(ErrCantBootstrap)\n\n\t\tcase newLog := <-r.applyCh:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\tif r.getLeadershipTransferInProgress() {\n\t\t\t\tr.logger.Debug(ErrLeadershipTransferInProgress.Error())\n\t\t\t\tnewLog.respond(ErrLeadershipTransferInProgress)\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\t// Group commit, gather all the ready commits\n\t\t\tready := []*logFuture{newLog}\n\t\tGROUP_COMMIT_LOOP:\n\t\t\tfor i := 0; i < r.config().MaxAppendEntries; i++ {\n\t\t\t\tselect {\n\t\t\t\tcase newLog := <-r.applyCh:\n\t\t\t\t\tready = append(ready, newLog)\n\t\t\t\tdefault:\n\t\t\t\t\tbreak GROUP_COMMIT_LOOP\n\t\t\t\t}\n\t\t\t}\n\n\t\t\t// Dispatch the logs\n\t\t\tif stepDown {\n\t\t\t\t// we're in the process of stepping down as leader, don't process anything new\n\t\t\t\tfor i := range ready {\n\t\t\t\t\tready[i].respond(ErrNotLeader)\n\t\t\t\t}\n\t\t\t} else {\n\t\t\t\tr.dispatchLogs(ready)\n\t\t\t}\n\n\t\tcase <-lease:\n\t\t\tr.mainThreadSaturation.working()\n\t\t\t// Check if we've exceeded the lease, potentially stepping down\n\t\t\tmaxDiff := r.checkLeaderLease()\n\n\t\t\t// Next check interval should adjust for the last node we've\n\t\t\t// contacted, without going negative\n\t\t\tcheckInterval := r.config().LeaderLeaseTimeout - maxDiff\n\t\t\tif checkInterval < minCheckInterval {\n\t\t\t\tcheckInterval = minCheckInterval\n\t\t\t}\n\n\t\t\t// Renew the lease timer\n\t\t\tlease = time.After(checkInterval)\n\n\t\tcase <-r.leaderNotifyCh:\n\t\t\tfor _, repl := range r.leaderState.replState {\n\t\t\t\tasyncNotifyCh(repl.notifyCh)\n\t\t\t}\n\n\t\tcase <-r.followerNotifyCh:\n\t\t\t//  Ignore since we are not a follower\n\n\t\tcase <-r.shutdownCh:\n\t\t\treturn\n\t\t}\n\t}\n}\n\n// verifyLeader must be called from the main thread for safety.\n// Causes the followers to attempt an immediate heartbeat.\nfunc (r *Raft) verifyLeader(v *verifyFuture) {\n\t// Current leader always votes for self\n\tv.votes = 1\n\n\t// Set the quorum size, hot-path for single node\n\tv.quorumSize = r.quorumSize()\n\tif v.quorumSize == 1 {\n\t\tv.respond(nil)\n\t\treturn\n\t}\n\n\t// Track this request\n\tv.notifyCh = r.verifyCh\n\tr.leaderState.notify[v] = struct{}{}\n\n\t// Trigger immediate heartbeats\n\tfor _, repl := range r.leaderState.replState {\n\t\trepl.notifyLock.Lock()\n\t\trepl.notify[v] = struct{}{}\n\t\trepl.notifyLock.Unlock()\n\t\tasyncNotifyCh(repl.notifyCh)\n\t}\n}\n\n// leadershipTransfer is doing the heavy lifting for the leadership transfer.\nfunc (r *Raft) leadershipTransfer(id ServerID, address ServerAddress, repl *followerReplication, stopCh chan struct{}, doneCh chan error) {\n\t// make sure we are not already stopped\n\tselect {\n\tcase <-stopCh:\n\t\tdoneCh <- nil\n\t\treturn\n\tdefault:\n\t}\n\n\tfor atomic.LoadUint64(&repl.nextIndex) <= r.getLastIndex() {\n\t\terr := &deferError{}\n\t\terr.init()\n\t\trepl.triggerDeferErrorCh <- err\n\t\tselect {\n\t\tcase err := <-err.errCh:\n\t\t\tif err != nil {\n\t\t\t\tdoneCh <- err\n\t\t\t\treturn\n\t\t\t}\n\t\tcase <-stopCh:\n\t\t\tdoneCh <- nil\n\t\t\treturn\n\t\t}\n\t}\n\n\t// Step ?: the thesis describes in chap 6.4.1: Using clocks to reduce\n\t// messaging for read-only queries. If this is implemented, the lease\n\t// has to be reset as well, in case leadership is transferred. This\n\t// implementation also has a lease, but it serves another purpose and\n\t// doesn't need to be reset. The lease mechanism in our raft lib, is\n\t// setup in a similar way to the one in the thesis, but in practice\n\t// it's a timer that just tells the leader how often to check\n\t// heartbeats are still coming in.\n\n\t// Step 3: send TimeoutNow message to target server.\n\terr := r.trans.TimeoutNow(id, address, &TimeoutNowRequest{RPCHeader: r.getRPCHeader()}, &TimeoutNowResponse{})\n\tif err != nil {\n\t\terr = fmt.Errorf(\"failed to make TimeoutNow RPC to %v: %v\", id, err)\n\t}\n\tdoneCh <- err\n}\n\n// checkLeaderLease is used to check if we can contact a quorum of nodes\n// within the last leader lease interval. If not, we need to step down,\n// as we may have lost connectivity. Returns the maximum duration without\n// contact. This must only be called from the main thread.\nfunc (r *Raft) checkLeaderLease() time.Duration {\n\t// Track contacted nodes, we can always contact ourself\n\tcontacted := 0\n\n\t// Store lease timeout for this one check invocation as we need to refer to it\n\t// in the loop and would be confusing if it ever becomes reloadable and\n\t// changes between iterations below.\n\tleaseTimeout := r.config().LeaderLeaseTimeout\n\n\t// Check each follower\n\tvar maxDiff time.Duration\n\tnow := time.Now()\n\tfor _, server := range r.configurations.latest.Servers {\n\t\tif server.Suffrage == Voter {\n\t\t\tif server.ID == r.localID {\n\t\t\t\tcontacted++\n\t\t\t\tcontinue\n\t\t\t}\n\t\t\tf := r.leaderState.replState[server.ID]\n\t\t\tdiff := now.Sub(f.LastContact())\n\t\t\tif diff <= leaseTimeout {\n\t\t\t\tcontacted++\n\t\t\t\tif diff > maxDiff {\n\t\t\t\t\tmaxDiff = diff\n\t\t\t\t}\n\t\t\t} else {\n\t\t\t\t// Log at least once at high value, then debug. Otherwise it gets very verbose.\n\t\t\t\tif diff <= 3*leaseTimeout {\n\t\t\t\t\tr.logger.Warn(\"failed to contact\", \"server-id\", server.ID, \"time\", diff)\n\t\t\t\t} else {\n\t\t\t\t\tr.logger.Debug(\"failed to contact\", \"server-id\", server.ID, \"time\", diff)\n\t\t\t\t}\n\t\t\t}\n\t\t\tmetrics.AddSample([]string{\"raft\", \"leader\", \"lastContact\"}, float32(diff/time.Millisecond))\n\t\t}\n\t}\n\n\t// Verify we can contact a quorum\n\tquorum := r.quorumSize()\n\tif contacted < quorum {\n\t\tr.logger.Warn(\"failed to contact quorum of nodes, stepping down\")\n\t\tr.setState(Follower)\n\t\tmetrics.IncrCounter([]string{\"raft\", \"transition\", \"leader_lease_timeout\"}, 1)\n\t}\n\treturn maxDiff\n}\n\n// quorumSize is used to return the quorum size. This must only be called on\n// the main thread.\n// TODO: revisit usage\nfunc (r *Raft) quorumSize() int {\n\tvoters := 0\n\tfor _, server := range r.configurations.latest.Servers {\n\t\tif server.Suffrage == Voter {\n\t\t\tvoters++\n\t\t}\n\t}\n\treturn voters/2 + 1\n}\n\n// restoreUserSnapshot is used to manually consume an external snapshot, such\n// as if restoring from a backup. We will use the current Raft configuration,\n// not the one from the snapshot, so that we can restore into a new cluster. We\n// will also use the higher of the index of the snapshot, or the current index,\n// and then add 1 to that, so we force a new state with a hole in the Raft log,\n// so that the snapshot will be sent to followers and used for any new joiners.\n// This can only be run on the leader, and returns a future that can be used to\n// block until complete.\nfunc (r *Raft) restoreUserSnapshot(meta *SnapshotMeta, reader io.Reader) error {\n\tdefer metrics.MeasureSince([]string{\"raft\", \"restoreUserSnapshot\"}, time.Now())\n\n\t// Sanity check the version.\n\tversion := meta.Version\n\tif version < SnapshotVersionMin || version > SnapshotVersionMax {\n\t\treturn fmt.Errorf(\"unsupported snapshot version %d\", version)\n\t}\n\n\t// We don't support snapshots while there's a config change\n\t// outstanding since the snapshot doesn't have a means to\n\t// represent this state.\n\tcommittedIndex := r.configurations.committedIndex\n\tlatestIndex := r.configurations.latestIndex\n\tif committedIndex != latestIndex {\n\t\treturn fmt.Errorf(\"cannot restore snapshot now, wait until the configuration entry at %v has been applied (have applied %v)\",\n\t\t\tlatestIndex, committedIndex)\n\t}\n\n\t// Cancel any inflight requests.\n\tfor {\n\t\te := r.leaderState.inflight.Front()\n\t\tif e == nil {\n\t\t\tbreak\n\t\t}\n\t\te.Value.(*logFuture).respond(ErrAbortedByRestore)\n\t\tr.leaderState.inflight.Remove(e)\n\t}\n\n\t// We will overwrite the snapshot metadata with the current term,\n\t// an index that's greater than the current index, or the last\n\t// index in the snapshot. It's important that we leave a hole in\n\t// the index so we know there's nothing in the Raft log there and\n\t// replication will fault and send the snapshot.\n\tterm := r.getCurrentTerm()\n\tlastIndex := r.getLastIndex()\n\tif meta.Index > lastIndex {\n\t\tlastIndex = meta.Index\n\t}\n\tlastIndex++\n\n\t// Dump the snapshot. Note that we use the latest configuration,\n\t// not the one that came with the snapshot.\n\tsink, err := r.snapshots.Create(version, lastIndex, term,\n\t\tr.configurations.latest, r.configurations.latestIndex, r.trans)\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to create snapshot: %v\", err)\n\t}\n\tn, err := io.Copy(sink, reader)\n\tif err != nil {\n\t\t_ = sink.Cancel()\n\t\treturn fmt.Errorf(\"failed to write snapshot: %v\", err)\n\t}\n\tif n != meta.Size {\n\t\t_ = sink.Cancel()\n\t\treturn fmt.Errorf(\"failed to write snapshot, size didn't match (%d != %d)\", n, meta.Size)\n\t}\n\tif err := sink.Close(); err != nil {\n\t\treturn fmt.Errorf(\"failed to close snapshot: %v\", err)\n\t}\n\tr.logger.Info(\"copied to local snapshot\", \"bytes\", n)\n\n\t// Restore the snapshot into the FSM. If this fails we are in a\n\t// bad state so we panic to take ourselves out.\n\tfsm := &restoreFuture{ID: sink.ID()}\n\tfsm.ShutdownCh = r.shutdownCh\n\tfsm.init()\n\tselect {\n\tcase r.fsmMutateCh <- fsm:\n\tcase <-r.shutdownCh:\n\t\treturn ErrRaftShutdown\n\t}\n\tif err := fsm.Error(); err != nil {\n\t\tpanic(fmt.Errorf(\"failed to restore snapshot: %v\", err))\n\t}\n\n\t// We set the last log so it looks like we've stored the empty\n\t// index we burned. The last applied is set because we made the\n\t// FSM take the snapshot state, and we store the last snapshot\n\t// in the stable store since we created a snapshot as part of\n\t// this process.\n\tr.setLastLog(lastIndex, term)\n\tr.setLastApplied(lastIndex)\n\tr.setLastSnapshot(lastIndex, term)\n\n\t// Remove old logs if r.logs is a MonotonicLogStore. Log any errors and continue.\n\tif logs, ok := r.logs.(MonotonicLogStore); ok && logs.IsMonotonic() {\n\t\tif err := r.removeOldLogs(); err != nil {\n\t\t\tr.logger.Error(\"failed to remove old logs\", \"error\", err)\n\t\t}\n\t}\n\n\tr.logger.Info(\"restored user snapshot\", \"index\", lastIndex)\n\treturn nil\n}\n\n// appendConfigurationEntry changes the configuration and adds a new\n// configuration entry to the log. This must only be called from the\n// main thread.\nfunc (r *Raft) appendConfigurationEntry(future *configurationChangeFuture) {\n\tconfiguration, err := nextConfiguration(r.configurations.latest, r.configurations.latestIndex, future.req)\n\tif err != nil {\n\t\tfuture.respond(err)\n\t\treturn\n\t}\n\n\tr.logger.Info(\"updating configuration\",\n\t\t\"command\", future.req.command,\n\t\t\"server-id\", future.req.serverID,\n\t\t\"server-addr\", future.req.serverAddress,\n\t\t\"servers\", hclog.Fmt(\"%+v\", configuration.Servers))\n\n\t// In pre-ID compatibility mode we translate all configuration changes\n\t// in to an old remove peer message, which can handle all supported\n\t// cases for peer changes in the pre-ID world (adding and removing\n\t// voters). Both add peer and remove peer log entries are handled\n\t// similarly on old Raft servers, but remove peer does extra checks to\n\t// see if a leader needs to step down. Since they both assert the full\n\t// configuration, then we can safely call remove peer for everything.\n\tif r.protocolVersion < 2 {\n\t\tfuture.log = Log{\n\t\t\tType: LogRemovePeerDeprecated,\n\t\t\tData: encodePeers(configuration, r.trans),\n\t\t}\n\t} else {\n\t\tfuture.log = Log{\n\t\t\tType: LogConfiguration,\n\t\t\tData: EncodeConfiguration(configuration),\n\t\t}\n\t}\n\n\tr.dispatchLogs([]*logFuture{&future.logFuture})\n\tindex := future.Index()\n\tr.setLatestConfiguration(configuration, index)\n\tr.leaderState.commitment.setConfiguration(configuration)\n\tr.startStopReplication()\n}\n\n// dispatchLog is called on the leader to push a log to disk, mark it\n// as inflight and begin replication of it.\nfunc (r *Raft) dispatchLogs(applyLogs []*logFuture) {\n\tnow := time.Now()\n\tdefer metrics.MeasureSince([]string{\"raft\", \"leader\", \"dispatchLog\"}, now)\n\n\tterm := r.getCurrentTerm()\n\tlastIndex := r.getLastIndex()\n\n\tn := len(applyLogs)\n\tlogs := make([]*Log, n)\n\tmetrics.SetGauge([]string{\"raft\", \"leader\", \"dispatchNumLogs\"}, float32(n))\n\n\tfor idx, applyLog := range applyLogs {\n\t\tapplyLog.dispatch = now\n\t\tlastIndex++\n\t\tapplyLog.log.Index = lastIndex\n\t\tapplyLog.log.Term = term\n\t\tapplyLog.log.AppendedAt = now\n\t\tlogs[idx] = &applyLog.log\n\t\tr.leaderState.inflight.PushBack(applyLog)\n\t}\n\n\t// Write the log entry locally\n\tif err := r.logs.StoreLogs(logs); err != nil {\n\t\tr.logger.Error(\"failed to commit logs\", \"error\", err)\n\t\tfor _, applyLog := range applyLogs {\n\t\t\tapplyLog.respond(err)\n\t\t}\n\t\tr.setState(Follower)\n\t\treturn\n\t}\n\tr.leaderState.commitment.match(r.localID, lastIndex)\n\n\t// Update the last log since it's on disk now\n\tr.setLastLog(lastIndex, term)\n\n\t// Notify the replicators of the new log\n\tfor _, f := range r.leaderState.replState {\n\t\tasyncNotifyCh(f.triggerCh)\n\t}\n}\n\n// processLogs is used to apply all the committed entries that haven't been\n// applied up to the given index limit.\n// This can be called from both leaders and followers.\n// Followers call this from AppendEntries, for n entries at a time, and always\n// pass futures=nil.\n// Leaders call this when entries are committed. They pass the futures from any\n// inflight logs.\nfunc (r *Raft) processLogs(index uint64, futures map[uint64]*logFuture) {\n\t// Reject logs we've applied already\n\tlastApplied := r.getLastApplied()\n\tif index <= lastApplied {\n\t\tr.logger.Warn(\"skipping application of old log\", \"index\", index)\n\t\treturn\n\t}\n\n\tapplyBatch := func(batch []*commitTuple) {\n\t\tselect {\n\t\tcase r.fsmMutateCh <- batch:\n\t\tcase <-r.shutdownCh:\n\t\t\tfor _, cl := range batch {\n\t\t\t\tif cl.future != nil {\n\t\t\t\t\tcl.future.respond(ErrRaftShutdown)\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\n\t// Store maxAppendEntries for this call in case it ever becomes reloadable. We\n\t// need to use the same value for all lines here to get the expected result.\n\tmaxAppendEntries := r.config().MaxAppendEntries\n\n\tbatch := make([]*commitTuple, 0, maxAppendEntries)\n\n\t// Apply all the preceding logs\n\tfor idx := lastApplied + 1; idx <= index; idx++ {\n\t\tvar preparedLog *commitTuple\n\t\t// Get the log, either from the future or from our log store\n\t\tfuture, futureOk := futures[idx]\n\t\tif futureOk {\n\t\t\tpreparedLog = r.prepareLog(&future.log, future)\n\t\t} else {\n\t\t\tl := new(Log)\n\t\t\tif err := r.logs.GetLog(idx, l); err != nil {\n\t\t\t\tr.logger.Error(\"failed to get log\", \"index\", idx, \"error\", err)\n\t\t\t\tpanic(err)\n\t\t\t}\n\t\t\tpreparedLog = r.prepareLog(l, nil)\n\t\t}\n\n\t\tswitch {\n\t\tcase preparedLog != nil:\n\t\t\t// If we have a log ready to send to the FSM add it to the batch.\n\t\t\t// The FSM thread will respond to the future.\n\t\t\tbatch = append(batch, preparedLog)\n\n\t\t\t// If we have filled up a batch, send it to the FSM\n\t\t\tif len(batch) >= maxAppendEntries {\n\t\t\t\tapplyBatch(batch)\n\t\t\t\tbatch = make([]*commitTuple, 0, maxAppendEntries)\n\t\t\t}\n\n\t\tcase futureOk:\n\t\t\t// Invoke the future if given.\n\t\t\tfuture.respond(nil)\n\t\t}\n\t}\n\n\t// If there are any remaining logs in the batch apply them\n\tif len(batch) != 0 {\n\t\tapplyBatch(batch)\n\t}\n\n\t// Update the lastApplied index and term\n\tr.setLastApplied(index)\n}\n\n// processLog is invoked to process the application of a single committed log entry.\nfunc (r *Raft) prepareLog(l *Log, future *logFuture) *commitTuple {\n\tswitch l.Type {\n\tcase LogBarrier:\n\t\t// Barrier is handled by the FSM\n\t\tfallthrough\n\n\tcase LogCommand:\n\t\treturn &commitTuple{l, future}\n\n\tcase LogConfiguration:\n\t\t// Only support this with the v2 configuration format\n\t\tif r.protocolVersion > 2 {\n\t\t\treturn &commitTuple{l, future}\n\t\t}\n\tcase LogAddPeerDeprecated:\n\tcase LogRemovePeerDeprecated:\n\tcase LogNoop:\n\t\t// Ignore the no-op\n\n\tdefault:\n\t\tpanic(fmt.Errorf(\"unrecognized log type: %#v\", l))\n\t}\n\n\treturn nil\n}\n\n// processRPC is called to handle an incoming RPC request. This must only be\n// called from the main thread.\nfunc (r *Raft) processRPC(rpc RPC) {\n\tif err := r.checkRPCHeader(rpc); err != nil {\n\t\trpc.Respond(nil, err)\n\t\treturn\n\t}\n\n\tswitch cmd := rpc.Command.(type) {\n\tcase *AppendEntriesRequest:\n\t\tr.appendEntries(rpc, cmd)\n\tcase *RequestVoteRequest:\n\t\tr.requestVote(rpc, cmd)\n\tcase *RequestPreVoteRequest:\n\t\tr.requestPreVote(rpc, cmd)\n\tcase *InstallSnapshotRequest:\n\t\tr.installSnapshot(rpc, cmd)\n\tcase *TimeoutNowRequest:\n\t\tr.timeoutNow(rpc, cmd)\n\tdefault:\n\t\tr.logger.Error(\"got unexpected command\",\n\t\t\t\"command\", hclog.Fmt(\"%#v\", rpc.Command))\n\n\t\trpc.Respond(nil, errors.New(rpcUnexpectedCommandError))\n\t}\n}\n\n// processHeartbeat is a special handler used just for heartbeat requests\n// so that they can be fast-pathed if a transport supports it. This must only\n// be called from the main thread.\nfunc (r *Raft) processHeartbeat(rpc RPC) {\n\tdefer metrics.MeasureSince([]string{\"raft\", \"rpc\", \"processHeartbeat\"}, time.Now())\n\n\t// Check if we are shutdown, just ignore the RPC\n\tselect {\n\tcase <-r.shutdownCh:\n\t\treturn\n\tdefault:\n\t}\n\n\t// Ensure we are only handling a heartbeat\n\tswitch cmd := rpc.Command.(type) {\n\tcase *AppendEntriesRequest:\n\t\tr.appendEntries(rpc, cmd)\n\tdefault:\n\t\tr.logger.Error(\"expected heartbeat, got\", \"command\", hclog.Fmt(\"%#v\", rpc.Command))\n\t\trpc.Respond(nil, fmt.Errorf(\"unexpected command\"))\n\t}\n}\n\n// appendEntries is invoked when we get an append entries RPC call. This must\n// only be called from the main thread.\nfunc (r *Raft) appendEntries(rpc RPC, a *AppendEntriesRequest) {\n\tdefer metrics.MeasureSince([]string{\"raft\", \"rpc\", \"appendEntries\"}, time.Now())\n\t// Setup a response\n\tresp := &AppendEntriesResponse{\n\t\tRPCHeader:      r.getRPCHeader(),\n\t\tTerm:           r.getCurrentTerm(),\n\t\tLastLog:        r.getLastIndex(),\n\t\tSuccess:        false,\n\t\tNoRetryBackoff: false,\n\t}\n\tvar rpcErr error\n\tdefer func() {\n\t\trpc.Respond(resp, rpcErr)\n\t}()\n\n\t// Ignore an older term\n\tif a.Term < r.getCurrentTerm() {\n\t\treturn\n\t}\n\n\t// Increase the term if we see a newer one, also transition to follower\n\t// if we ever get an appendEntries call\n\tif a.Term > r.getCurrentTerm() || (r.getState() != Follower && !r.candidateFromLeadershipTransfer.Load()) {\n\t\t// Ensure transition to follower\n\t\tr.setState(Follower)\n\t\tr.setCurrentTerm(a.Term)\n\t\tresp.Term = a.Term\n\t}\n\n\t// Save the current leader\n\tif len(a.Addr) > 0 {\n\t\tr.setLeader(r.trans.DecodePeer(a.Addr), ServerID(a.ID))\n\t} else {\n\t\tr.setLeader(r.trans.DecodePeer(a.Leader), ServerID(a.ID))\n\t}\n\t// Verify the last log entry\n\tif a.PrevLogEntry > 0 {\n\t\tlastIdx, lastTerm := r.getLastEntry()\n\n\t\tvar prevLogTerm uint64\n\t\tif a.PrevLogEntry == lastIdx {\n\t\t\tprevLogTerm = lastTerm\n\t\t} else {\n\t\t\tvar prevLog Log\n\t\t\tif err := r.logs.GetLog(a.PrevLogEntry, &prevLog); err != nil {\n\t\t\t\tr.logger.Warn(\"failed to get previous log\",\n\t\t\t\t\t\"previous-index\", a.PrevLogEntry,\n\t\t\t\t\t\"last-index\", lastIdx,\n\t\t\t\t\t\"error\", err)\n\t\t\t\tresp.NoRetryBackoff = true\n\t\t\t\treturn\n\t\t\t}\n\t\t\tprevLogTerm = prevLog.Term\n\t\t}\n\n\t\tif a.PrevLogTerm != prevLogTerm {\n\t\t\tr.logger.Warn(\"previous log term mis-match\",\n\t\t\t\t\"ours\", prevLogTerm,\n\t\t\t\t\"remote\", a.PrevLogTerm)\n\t\t\tresp.NoRetryBackoff = true\n\t\t\treturn\n\t\t}\n\t}\n\n\t// Process any new entries\n\tif len(a.Entries) > 0 {\n\t\tstart := time.Now()\n\n\t\t// Delete any conflicting entries, skip any duplicates\n\t\tlastLogIdx, _ := r.getLastLog()\n\t\tvar newEntries []*Log\n\t\tfor i, entry := range a.Entries {\n\t\t\tif entry.Index > lastLogIdx {\n\t\t\t\tnewEntries = a.Entries[i:]\n\t\t\t\tbreak\n\t\t\t}\n\t\t\tvar storeEntry Log\n\t\t\tif err := r.logs.GetLog(entry.Index, &storeEntry); err != nil {\n\t\t\t\tr.logger.Warn(\"failed to get log entry\",\n\t\t\t\t\t\"index\", entry.Index,\n\t\t\t\t\t\"error\", err)\n\t\t\t\treturn\n\t\t\t}\n\t\t\tif entry.Term != storeEntry.Term {\n\t\t\t\tr.logger.Warn(\"clearing log suffix\", \"from\", entry.Index, \"to\", lastLogIdx)\n\t\t\t\tif err := r.logs.DeleteRange(entry.Index, lastLogIdx); err != nil {\n\t\t\t\t\tr.logger.Error(\"failed to clear log suffix\", \"error\", err)\n\t\t\t\t\treturn\n\t\t\t\t}\n\t\t\t\tif entry.Index <= r.configurations.latestIndex {\n\t\t\t\t\tr.setLatestConfiguration(r.configurations.committed, r.configurations.committedIndex)\n\t\t\t\t}\n\t\t\t\tnewEntries = a.Entries[i:]\n\t\t\t\tbreak\n\t\t\t}\n\t\t}\n\n\t\tif n := len(newEntries); n > 0 {\n\t\t\t// Append the new entries\n\t\t\tif err := r.logs.StoreLogs(newEntries); err != nil {\n\t\t\t\tr.logger.Error(\"failed to append to logs\", \"error\", err)\n\t\t\t\t// TODO: leaving r.getLastLog() in the wrong\n\t\t\t\t// state if there was a truncation above\n\t\t\t\treturn\n\t\t\t}\n\n\t\t\t// Handle any new configuration changes\n\t\t\tfor _, newEntry := range newEntries {\n\t\t\t\tif err := r.processConfigurationLogEntry(newEntry); err != nil {\n\t\t\t\t\tr.logger.Warn(\"failed to append entry\",\n\t\t\t\t\t\t\"index\", newEntry.Index,\n\t\t\t\t\t\t\"error\", err)\n\t\t\t\t\trpcErr = err\n\t\t\t\t\treturn\n\t\t\t\t}\n\t\t\t}\n\n\t\t\t// Update the lastLog\n\t\t\tlast := newEntries[n-1]\n\t\t\tr.setLastLog(last.Index, last.Term)\n\t\t}\n\n\t\tmetrics.MeasureSince([]string{\"raft\", \"rpc\", \"appendEntries\", \"storeLogs\"}, start)\n\t}\n\n\t// Update the commit index\n\tif a.LeaderCommitIndex > 0 && a.LeaderCommitIndex > r.getCommitIndex() {\n\t\tstart := time.Now()\n\t\tidx := min(a.LeaderCommitIndex, r.getLastIndex())\n\t\tr.setCommitIndex(idx)\n\t\tif r.configurations.latestIndex <= idx {\n\t\t\tr.setCommittedConfiguration(r.configurations.latest, r.configurations.latestIndex)\n\t\t}\n\t\tr.processLogs(idx, nil)\n\t\tmetrics.MeasureSince([]string{\"raft\", \"rpc\", \"appendEntries\", \"processLogs\"}, start)\n\t}\n\n\t// Everything went well, set success\n\tresp.Success = true\n\tr.setLastContact()\n}\n\n// processConfigurationLogEntry takes a log entry and updates the latest\n// configuration if the entry results in a new configuration. This must only be\n// called from the main thread, or from NewRaft() before any threads have begun.\nfunc (r *Raft) processConfigurationLogEntry(entry *Log) error {\n\tswitch entry.Type {\n\tcase LogConfiguration:\n\t\tr.setCommittedConfiguration(r.configurations.latest, r.configurations.latestIndex)\n\t\tr.setLatestConfiguration(DecodeConfiguration(entry.Data), entry.Index)\n\n\tcase LogAddPeerDeprecated, LogRemovePeerDeprecated:\n\t\tr.setCommittedConfiguration(r.configurations.latest, r.configurations.latestIndex)\n\t\tconf, err := decodePeers(entry.Data, r.trans)\n\t\tif err != nil {\n\t\t\treturn err\n\t\t}\n\t\tr.setLatestConfiguration(conf, entry.Index)\n\t}\n\treturn nil\n}\n\n// requestVote is invoked when we get a request vote RPC call.\nfunc (r *Raft) requestVote(rpc RPC, req *RequestVoteRequest) {\n\tdefer metrics.MeasureSince([]string{\"raft\", \"rpc\", \"requestVote\"}, time.Now())\n\tr.observe(*req)\n\n\t// Setup a response\n\tresp := &RequestVoteResponse{\n\t\tRPCHeader: r.getRPCHeader(),\n\t\tTerm:      r.getCurrentTerm(),\n\t\tGranted:   false,\n\t}\n\tvar rpcErr error\n\tdefer func() {\n\t\trpc.Respond(resp, rpcErr)\n\t}()\n\n\t// Version 0 servers will panic unless the peers is present. It's only\n\t// used on them to produce a warning message.\n\tif r.protocolVersion < 2 {\n\t\tresp.Peers = encodePeers(r.configurations.latest, r.trans)\n\t}\n\n\t// Check if we have an existing leader [who's not the candidate] and also\n\t// check the LeadershipTransfer flag is set. Usually votes are rejected if\n\t// there is a known leader. But if the leader initiated a leadership transfer,\n\t// vote!\n\tvar candidate ServerAddress\n\tvar candidateBytes []byte\n\tif len(req.Addr) > 0 {\n\t\tcandidate = r.trans.DecodePeer(req.Addr)\n\t\tcandidateBytes = req.Addr\n\t} else {\n\t\tcandidate = r.trans.DecodePeer(req.Candidate)\n\t\tcandidateBytes = req.Candidate\n\t}\n\n\t// For older raft version ID is not part of the packed message\n\t// We assume that the peer is part of the configuration and skip this check\n\tif len(req.ID) > 0 {\n\t\tcandidateID := ServerID(req.ID)\n\t\t// if the Servers list is empty that mean the cluster is very likely trying to bootstrap,\n\t\t// Grant the vote\n\t\tif len(r.configurations.latest.Servers) > 0 && !inConfiguration(r.configurations.latest, candidateID) {\n\t\t\tr.logger.Warn(\"rejecting vote request since node is not in configuration\",\n\t\t\t\t\"from\", candidate)\n\t\t\treturn\n\t\t}\n\t}\n\tif leaderAddr, leaderID := r.LeaderWithID(); leaderAddr != \"\" && leaderAddr != candidate && !req.LeadershipTransfer {\n\t\tr.logger.Warn(\"rejecting vote request since we have a leader\",\n\t\t\t\"from\", candidate,\n\t\t\t\"leader\", leaderAddr,\n\t\t\t\"leader-id\", string(leaderID))\n\t\treturn\n\t}\n\n\t// Ignore an older term\n\tif req.Term < r.getCurrentTerm() {\n\t\treturn\n\t}\n\n\t// Increase the term if we see a newer one\n\tif req.Term > r.getCurrentTerm() {\n\t\t// Ensure transition to follower\n\t\tr.logger.Debug(\"lost leadership because received a requestVote with a newer term\")\n\t\tr.setState(Follower)\n\t\tr.setCurrentTerm(req.Term)\n\n\t\tresp.Term = req.Term\n\t}\n\n\t// if we get a request for vote from a nonVoter  and the request term is higher,\n\t// step down and update term, but reject the vote request\n\t// This could happen when a node, previously voter, is converted to non-voter\n\t// The reason we need to step in is to permit to the cluster to make progress in such a scenario\n\t// More details about that in https://github.com/hashicorp/raft/pull/526\n\tif len(req.ID) > 0 {\n\t\tcandidateID := ServerID(req.ID)\n\t\tif len(r.configurations.latest.Servers) > 0 && !hasVote(r.configurations.latest, candidateID) {\n\t\t\tr.logger.Warn(\"rejecting vote request since node is not a voter\", \"from\", candidate)\n\t\t\treturn\n\t\t}\n\t}\n\t// Check if we have voted yet\n\tlastVoteTerm, err := r.stable.GetUint64(keyLastVoteTerm)\n\tif err != nil && err.Error() != \"not found\" {\n\t\tr.logger.Error(\"failed to get last vote term\", \"error\", err)\n\t\treturn\n\t}\n\tlastVoteCandBytes, err := r.stable.Get(keyLastVoteCand)\n\tif err != nil && err.Error() != \"not found\" {\n\t\tr.logger.Error(\"failed to get last vote candidate\", \"error\", err)\n\t\treturn\n\t}\n\n\t// Check if we've voted in this election before\n\tif lastVoteTerm == req.Term && lastVoteCandBytes != nil {\n\t\tr.logger.Info(\"duplicate requestVote for same term\", \"term\", req.Term)\n\t\tif bytes.Equal(lastVoteCandBytes, candidateBytes) {\n\t\t\tr.logger.Warn(\"duplicate requestVote from\", \"candidate\", candidate)\n\t\t\tresp.Granted = true\n\t\t}\n\t\treturn\n\t}\n\n\t// Reject if their term is older\n\tlastIdx, lastTerm := r.getLastEntry()\n\tif lastTerm > req.LastLogTerm {\n\t\tr.logger.Warn(\"rejecting vote request since our last term is greater\",\n\t\t\t\"candidate\", candidate,\n\t\t\t\"last-term\", lastTerm,\n\t\t\t\"last-candidate-term\", req.LastLogTerm)\n\t\treturn\n\t}\n\n\tif lastTerm == req.LastLogTerm && lastIdx > req.LastLogIndex {\n\t\tr.logger.Warn(\"rejecting vote request since our last index is greater\",\n\t\t\t\"candidate\", candidate,\n\t\t\t\"last-index\", lastIdx,\n\t\t\t\"last-candidate-index\", req.LastLogIndex)\n\t\treturn\n\t}\n\n\t// Persist a vote for safety\n\tif err := r.persistVote(req.Term, candidateBytes); err != nil {\n\t\tr.logger.Error(\"failed to persist vote\", \"error\", err)\n\t\treturn\n\t}\n\n\tresp.Granted = true\n\tr.setLastContact()\n}\n\n// requestPreVote is invoked when we get a request Pre-Vote RPC call.\nfunc (r *Raft) requestPreVote(rpc RPC, req *RequestPreVoteRequest) {\n\tdefer metrics.MeasureSince([]string{\"raft\", \"rpc\", \"requestPreVote\"}, time.Now())\n\tr.observe(*req)\n\n\t// Setup a response\n\tresp := &RequestPreVoteResponse{\n\t\tRPCHeader: r.getRPCHeader(),\n\t\tTerm:      r.getCurrentTerm(),\n\t\tGranted:   false,\n\t}\n\tvar rpcErr error\n\tdefer func() {\n\t\trpc.Respond(resp, rpcErr)\n\t}()\n\n\t// Check if we have an existing leader [who's not the candidate] and also\n\tcandidate := r.trans.DecodePeer(req.GetRPCHeader().Addr)\n\tcandidateID := ServerID(req.ID)\n\n\t// if the Servers list is empty that mean the cluster is very likely trying to bootstrap,\n\t// Grant the vote\n\tif len(r.configurations.latest.Servers) > 0 && !inConfiguration(r.configurations.latest, candidateID) {\n\t\tr.logger.Warn(\"rejecting pre-vote request since node is not in configuration\",\n\t\t\t\"from\", candidate)\n\t\treturn\n\t}\n\n\tif leaderAddr, leaderID := r.LeaderWithID(); leaderAddr != \"\" && leaderAddr != candidate {\n\t\tr.logger.Warn(\"rejecting pre-vote request since we have a leader\",\n\t\t\t\"from\", candidate,\n\t\t\t\"leader\", leaderAddr,\n\t\t\t\"leader-id\", string(leaderID))\n\t\treturn\n\t}\n\n\t// Ignore an older term\n\tif req.Term < r.getCurrentTerm() {\n\t\treturn\n\t}\n\n\tif req.Term > r.getCurrentTerm() {\n\t\t// continue processing here to possibly grant the pre-vote as in a \"real\" vote this will transition us to follower\n\t\tr.logger.Debug(\"received a requestPreVote with a newer term, grant the pre-vote\")\n\t\tresp.Term = req.Term\n\t}\n\n\t// if we get a request for a pre-vote from a nonVoter  and the request term is higher, do not grant the Pre-Vote\n\t// This could happen when a node, previously voter, is converted to non-voter\n\tif len(r.configurations.latest.Servers) > 0 && !hasVote(r.configurations.latest, candidateID) {\n\t\tr.logger.Warn(\"rejecting pre-vote request since node is not a voter\", \"from\", candidate)\n\t\treturn\n\t}\n\n\t// Reject if their term is older\n\tlastIdx, lastTerm := r.getLastEntry()\n\tif lastTerm > req.LastLogTerm {\n\t\tr.logger.Warn(\"rejecting pre-vote request since our last term is greater\",\n\t\t\t\"candidate\", candidate,\n\t\t\t\"last-term\", lastTerm,\n\t\t\t\"last-candidate-term\", req.LastLogTerm)\n\t\treturn\n\t}\n\n\tif lastTerm == req.LastLogTerm && lastIdx > req.LastLogIndex {\n\t\tr.logger.Warn(\"rejecting pre-vote request since our last index is greater\",\n\t\t\t\"candidate\", candidate,\n\t\t\t\"last-index\", lastIdx,\n\t\t\t\"last-candidate-index\", req.LastLogIndex)\n\t\treturn\n\t}\n\n\tresp.Granted = true\n}\n\n// installSnapshot is invoked when we get a InstallSnapshot RPC call.\n// We must be in the follower state for this, since it means we are\n// too far behind a leader for log replay. This must only be called\n// from the main thread.\nfunc (r *Raft) installSnapshot(rpc RPC, req *InstallSnapshotRequest) {\n\tdefer metrics.MeasureSince([]string{\"raft\", \"rpc\", \"installSnapshot\"}, time.Now())\n\t// Setup a response\n\tresp := &InstallSnapshotResponse{\n\t\tTerm:    r.getCurrentTerm(),\n\t\tSuccess: false,\n\t}\n\tvar rpcErr error\n\tdefer func() {\n\t\t_, _ = io.Copy(io.Discard, rpc.Reader) // ensure we always consume all the snapshot data from the stream [see issue #212]\n\t\trpc.Respond(resp, rpcErr)\n\t}()\n\n\t// Sanity check the version\n\tif req.SnapshotVersion < SnapshotVersionMin ||\n\t\treq.SnapshotVersion > SnapshotVersionMax {\n\t\trpcErr = fmt.Errorf(\"unsupported snapshot version %d\", req.SnapshotVersion)\n\t\treturn\n\t}\n\n\t// Ignore an older term\n\tif req.Term < r.getCurrentTerm() {\n\t\tr.logger.Info(\"ignoring installSnapshot request with older term than current term\",\n\t\t\t\"request-term\", req.Term,\n\t\t\t\"current-term\", r.getCurrentTerm())\n\t\treturn\n\t}\n\n\t// Increase the term if we see a newer one\n\tif req.Term > r.getCurrentTerm() {\n\t\t// Ensure transition to follower\n\t\tr.setState(Follower)\n\t\tr.setCurrentTerm(req.Term)\n\t\tresp.Term = req.Term\n\t}\n\n\t// Save the current leader\n\tif len(req.ID) > 0 {\n\t\tr.setLeader(r.trans.DecodePeer(req.Addr), ServerID(req.ID))\n\t} else {\n\t\tr.setLeader(r.trans.DecodePeer(req.Leader), ServerID(req.ID))\n\t}\n\n\t// Create a new snapshot\n\tvar reqConfiguration Configuration\n\tvar reqConfigurationIndex uint64\n\tif req.SnapshotVersion > 0 {\n\t\treqConfiguration = DecodeConfiguration(req.Configuration)\n\t\treqConfigurationIndex = req.ConfigurationIndex\n\t} else {\n\t\treqConfiguration, rpcErr = decodePeers(req.Peers, r.trans)\n\t\tif rpcErr != nil {\n\t\t\tr.logger.Error(\"failed to install snapshot\", \"error\", rpcErr)\n\t\t\treturn\n\t\t}\n\t\treqConfigurationIndex = req.LastLogIndex\n\t}\n\tversion := getSnapshotVersion(r.protocolVersion)\n\tsink, err := r.snapshots.Create(version, req.LastLogIndex, req.LastLogTerm,\n\t\treqConfiguration, reqConfigurationIndex, r.trans)\n\tif err != nil {\n\t\tr.logger.Error(\"failed to create snapshot to install\", \"error\", err)\n\t\trpcErr = fmt.Errorf(\"failed to create snapshot: %v\", err)\n\t\treturn\n\t}\n\n\t// Separately track the progress of streaming a snapshot over the network\n\t// because this too can take a long time.\n\tcountingRPCReader := newCountingReader(rpc.Reader)\n\n\t// Spill the remote snapshot to disk\n\ttransferMonitor := startSnapshotRestoreMonitor(r.logger, countingRPCReader, req.Size, true)\n\tn, err := io.Copy(sink, countingRPCReader)\n\ttransferMonitor.StopAndWait()\n\tif err != nil {\n\t\t_ = sink.Cancel()\n\t\tr.logger.Error(\"failed to copy snapshot\", \"error\", err)\n\t\trpcErr = err\n\t\treturn\n\t}\n\n\t// Check that we received it all\n\tif n != req.Size {\n\t\t_ = sink.Cancel()\n\t\tr.logger.Error(\"failed to receive whole snapshot\",\n\t\t\t\"received\", hclog.Fmt(\"%d / %d\", n, req.Size))\n\t\trpcErr = fmt.Errorf(\"short read\")\n\t\treturn\n\t}\n\n\t// Finalize the snapshot\n\tif err := sink.Close(); err != nil {\n\t\tr.logger.Error(\"failed to finalize snapshot\", \"error\", err)\n\t\trpcErr = err\n\t\treturn\n\t}\n\tr.logger.Info(\"copied to local snapshot\", \"bytes\", n)\n\n\t// Restore snapshot\n\tfuture := &restoreFuture{ID: sink.ID()}\n\tfuture.ShutdownCh = r.shutdownCh\n\tfuture.init()\n\tselect {\n\tcase r.fsmMutateCh <- future:\n\tcase <-r.shutdownCh:\n\t\tfuture.respond(ErrRaftShutdown)\n\t\treturn\n\t}\n\n\t// Wait for the restore to happen\n\tif err := future.Error(); err != nil {\n\t\tr.logger.Error(\"failed to restore snapshot\", \"error\", err)\n\t\trpcErr = err\n\t\treturn\n\t}\n\n\t// Update the lastApplied so we don't replay old logs\n\tr.setLastApplied(req.LastLogIndex)\n\n\t// Update the last stable snapshot info\n\tr.setLastSnapshot(req.LastLogIndex, req.LastLogTerm)\n\n\t// Restore the peer set\n\tr.setLatestConfiguration(reqConfiguration, reqConfigurationIndex)\n\tr.setCommittedConfiguration(reqConfiguration, reqConfigurationIndex)\n\n\t// Clear old logs if r.logs is a MonotonicLogStore. Otherwise compact the\n\t// logs. In both cases, log any errors and continue.\n\tif mlogs, ok := r.logs.(MonotonicLogStore); ok && mlogs.IsMonotonic() {\n\t\tif err := r.removeOldLogs(); err != nil {\n\t\t\tr.logger.Error(\"failed to reset logs\", \"error\", err)\n\t\t}\n\t} else if err := r.compactLogs(req.LastLogIndex); err != nil {\n\t\tr.logger.Error(\"failed to compact logs\", \"error\", err)\n\t}\n\n\tr.logger.Info(\"Installed remote snapshot\")\n\tresp.Success = true\n\tr.setLastContact()\n}\n\n// setLastContact is used to set the last contact time to now\nfunc (r *Raft) setLastContact() {\n\tr.lastContactLock.Lock()\n\tr.lastContact = time.Now()\n\tr.lastContactLock.Unlock()\n}\n\ntype voteResult struct {\n\tRequestVoteResponse\n\tvoterID ServerID\n}\n\ntype preVoteResult struct {\n\tRequestPreVoteResponse\n\tvoterID ServerID\n}\n\n// electSelf is used to send a RequestVote RPC to all peers, and vote for\n// ourself. This has the side affecting of incrementing the current term. The\n// response channel returned is used to wait for all the responses (including a\n// vote for ourself). This must only be called from the main thread.\nfunc (r *Raft) electSelf() <-chan *voteResult {\n\t// Create a response channel\n\trespCh := make(chan *voteResult, len(r.configurations.latest.Servers))\n\n\t// Increment the term\n\tnewTerm := r.getCurrentTerm() + 1\n\n\tr.setCurrentTerm(newTerm)\n\t// Construct the request\n\tlastIdx, lastTerm := r.getLastEntry()\n\treq := &RequestVoteRequest{\n\t\tRPCHeader: r.getRPCHeader(),\n\t\tTerm:      newTerm,\n\t\t// this is needed for retro compatibility, before RPCHeader.Addr was added\n\t\tCandidate:          r.trans.EncodePeer(r.localID, r.localAddr),\n\t\tLastLogIndex:       lastIdx,\n\t\tLastLogTerm:        lastTerm,\n\t\tLeadershipTransfer: r.candidateFromLeadershipTransfer.Load(),\n\t}\n\n\t// Construct a function to ask for a vote\n\taskPeer := func(peer Server) {\n\t\tr.goFunc(func() {\n\t\t\tdefer metrics.MeasureSince([]string{\"raft\", \"candidate\", \"electSelf\"}, time.Now())\n\t\t\tresp := &voteResult{voterID: peer.ID}\n\t\t\terr := r.trans.RequestVote(peer.ID, peer.Address, req, &resp.RequestVoteResponse)\n\t\t\tif err != nil {\n\t\t\t\tr.logger.Error(\"failed to make requestVote RPC\",\n\t\t\t\t\t\"target\", peer,\n\t\t\t\t\t\"error\", err,\n\t\t\t\t\t\"term\", req.Term)\n\t\t\t\tresp.Term = req.Term\n\t\t\t\tresp.Granted = false\n\t\t\t}\n\t\t\trespCh <- resp\n\t\t})\n\t}\n\n\t// For each peer, request a vote\n\tfor _, server := range r.configurations.latest.Servers {\n\t\tif server.Suffrage == Voter {\n\t\t\tif server.ID == r.localID {\n\t\t\t\tr.logger.Debug(\"voting for self\", \"term\", req.Term, \"id\", r.localID)\n\n\t\t\t\t// Persist a vote for ourselves\n\t\t\t\tif err := r.persistVote(req.Term, req.Addr); err != nil {\n\t\t\t\t\tr.logger.Error(\"failed to persist vote\", \"error\", err)\n\t\t\t\t\treturn nil\n\n\t\t\t\t}\n\t\t\t\t// Include our own vote\n\t\t\t\trespCh <- &voteResult{\n\t\t\t\t\tRequestVoteResponse: RequestVoteResponse{\n\t\t\t\t\t\tRPCHeader: r.getRPCHeader(),\n\t\t\t\t\t\tTerm:      req.Term,\n\t\t\t\t\t\tGranted:   true,\n\t\t\t\t\t},\n\t\t\t\t\tvoterID: r.localID,\n\t\t\t\t}\n\t\t\t} else {\n\t\t\t\tr.logger.Debug(\"asking for vote\", \"term\", req.Term, \"from\", server.ID, \"address\", server.Address)\n\t\t\t\taskPeer(server)\n\t\t\t}\n\t\t}\n\t}\n\n\treturn respCh\n}\n\n// preElectSelf is used to send a RequestPreVote RPC to all peers, and vote for\n// ourself. This will not increment the current term. The\n// response channel returned is used to wait for all the responses (including a\n// vote for ourself).\n// This must only be called from the main thread.\nfunc (r *Raft) preElectSelf() <-chan *preVoteResult {\n\n\t// At this point transport should support pre-vote\n\t// but check just in case\n\tprevoteTrans, prevoteTransSupported := r.trans.(WithPreVote)\n\tif !prevoteTransSupported {\n\t\tpanic(\"preElection is not possible if the transport don't support pre-vote\")\n\t}\n\n\t// Create a response channel\n\trespCh := make(chan *preVoteResult, len(r.configurations.latest.Servers))\n\n\t// Propose the next term without actually changing our state\n\tnewTerm := r.getCurrentTerm() + 1\n\n\t// Construct the request\n\tlastIdx, lastTerm := r.getLastEntry()\n\treq := &RequestPreVoteRequest{\n\t\tRPCHeader:    r.getRPCHeader(),\n\t\tTerm:         newTerm,\n\t\tLastLogIndex: lastIdx,\n\t\tLastLogTerm:  lastTerm,\n\t}\n\n\t// Construct a function to ask for a vote\n\taskPeer := func(peer Server) {\n\t\tr.goFunc(func() {\n\t\t\tdefer metrics.MeasureSince([]string{\"raft\", \"candidate\", \"preElectSelf\"}, time.Now())\n\t\t\tresp := &preVoteResult{voterID: peer.ID}\n\n\t\t\terr := prevoteTrans.RequestPreVote(peer.ID, peer.Address, req, &resp.RequestPreVoteResponse)\n\n\t\t\t// If the target server do not support Pre-vote RPC we count this as a granted vote to allow\n\t\t\t// the cluster to progress.\n\t\t\tif err != nil && strings.Contains(err.Error(), rpcUnexpectedCommandError) {\n\t\t\t\tr.logger.Error(\"target does not support pre-vote RPC, treating as granted\",\n\t\t\t\t\t\"target\", peer,\n\t\t\t\t\t\"error\", err,\n\t\t\t\t\t\"term\", req.Term)\n\t\t\t\tresp.Term = req.Term\n\t\t\t\tresp.Granted = true\n\t\t\t} else if err != nil {\n\t\t\t\tr.logger.Error(\"failed to make requestVote RPC\",\n\t\t\t\t\t\"target\", peer,\n\t\t\t\t\t\"error\", err,\n\t\t\t\t\t\"term\", req.Term)\n\t\t\t\tresp.Term = req.Term\n\t\t\t\tresp.Granted = false\n\t\t\t}\n\t\t\trespCh <- resp\n\n\t\t})\n\t}\n\n\t// For each peer, request a vote\n\tfor _, server := range r.configurations.latest.Servers {\n\t\tif server.Suffrage == Voter {\n\t\t\tif server.ID == r.localID {\n\t\t\t\tr.logger.Debug(\"pre-voting for self\", \"term\", req.Term, \"id\", r.localID)\n\n\t\t\t\t// cast a pre-vote for our self\n\t\t\t\trespCh <- &preVoteResult{\n\t\t\t\t\tRequestPreVoteResponse: RequestPreVoteResponse{\n\t\t\t\t\t\tRPCHeader: r.getRPCHeader(),\n\t\t\t\t\t\tTerm:      req.Term,\n\t\t\t\t\t\tGranted:   true,\n\t\t\t\t\t},\n\t\t\t\t\tvoterID: r.localID,\n\t\t\t\t}\n\t\t\t} else {\n\t\t\t\tr.logger.Debug(\"asking for pre-vote\", \"term\", req.Term, \"from\", server.ID, \"address\", server.Address)\n\t\t\t\taskPeer(server)\n\t\t\t}\n\t\t}\n\t}\n\n\treturn respCh\n}\n\n// persistVote is used to persist our vote for safety.\nfunc (r *Raft) persistVote(term uint64, candidate []byte) error {\n\tif err := r.stable.SetUint64(keyLastVoteTerm, term); err != nil {\n\t\treturn err\n\t}\n\tif err := r.stable.Set(keyLastVoteCand, candidate); err != nil {\n\t\treturn err\n\t}\n\treturn nil\n}\n\n// setCurrentTerm is used to set the current term in a durable manner.\nfunc (r *Raft) setCurrentTerm(t uint64) {\n\t// Persist to disk first\n\tif err := r.stable.SetUint64(keyCurrentTerm, t); err != nil {\n\t\tpanic(fmt.Errorf(\"failed to save current term: %v\", err))\n\t}\n\tr.raftState.setCurrentTerm(t)\n}\n\n// setState is used to update the current state. Any state\n// transition causes the known leader to be cleared. This means\n// that leader should be set only after updating the state.\nfunc (r *Raft) setState(state RaftState) {\n\tr.setLeader(\"\", \"\")\n\toldState := r.getState()\n\tr.raftState.setState(state)\n\tif oldState != state {\n\t\tr.observe(state)\n\t}\n}\n\n// pickServer returns the follower that is most up to date and participating in quorum.\n// Because it accesses leaderstate, it should only be called from the leaderloop.\nfunc (r *Raft) pickServer() *Server {\n\tvar pick *Server\n\tvar current uint64\n\tfor _, server := range r.configurations.latest.Servers {\n\t\tif server.ID == r.localID || server.Suffrage != Voter {\n\t\t\tcontinue\n\t\t}\n\t\tstate, ok := r.leaderState.replState[server.ID]\n\t\tif !ok {\n\t\t\tcontinue\n\t\t}\n\t\tnextIdx := atomic.LoadUint64(&state.nextIndex)\n\t\tif nextIdx > current {\n\t\t\tcurrent = nextIdx\n\t\t\ttmp := server\n\t\t\tpick = &tmp\n\t\t}\n\t}\n\treturn pick\n}\n\n// initiateLeadershipTransfer starts the leadership on the leader side, by\n// sending a message to the leadershipTransferCh, to make sure it runs in the\n// mainloop.\nfunc (r *Raft) initiateLeadershipTransfer(id *ServerID, address *ServerAddress) LeadershipTransferFuture {\n\tfuture := &leadershipTransferFuture{ID: id, Address: address}\n\tfuture.init()\n\n\tif id != nil && *id == r.localID {\n\t\terr := fmt.Errorf(\"cannot transfer leadership to itself\")\n\t\tr.logger.Info(err.Error())\n\t\tfuture.respond(err)\n\t\treturn future\n\t}\n\n\tselect {\n\tcase r.leadershipTransferCh <- future:\n\t\treturn future\n\tcase <-r.shutdownCh:\n\t\treturn errorFuture{ErrRaftShutdown}\n\tdefault:\n\t\treturn errorFuture{ErrEnqueueTimeout}\n\t}\n}\n\n// timeoutNow is what happens when a server receives a TimeoutNowRequest.\nfunc (r *Raft) timeoutNow(rpc RPC, req *TimeoutNowRequest) {\n\tr.setLeader(\"\", \"\")\n\tr.setState(Candidate)\n\tr.candidateFromLeadershipTransfer.Store(true)\n\trpc.Respond(&TimeoutNowResponse{}, nil)\n}\n\n// setLatestConfiguration stores the latest configuration and updates a copy of it.\nfunc (r *Raft) setLatestConfiguration(c Configuration, i uint64) {\n\tr.configurations.latest = c\n\tr.configurations.latestIndex = i\n\tr.latestConfiguration.Store(c.Clone())\n}\n\n// setCommittedConfiguration stores the committed configuration.\nfunc (r *Raft) setCommittedConfiguration(c Configuration, i uint64) {\n\tr.configurations.committed = c\n\tr.configurations.committedIndex = i\n}\n\n// getLatestConfiguration reads the configuration from a copy of the main\n// configuration, which means it can be accessed independently from the main\n// loop.\nfunc (r *Raft) getLatestConfiguration() Configuration {\n\t// this switch catches the case where this is called without having set\n\t// a configuration previously.\n\tswitch c := r.latestConfiguration.Load().(type) {\n\tcase Configuration:\n\t\treturn c\n\tdefault:\n\t\treturn Configuration{}\n\t}\n}\n"
  },
  {
    "path": "raft_test.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"bytes\"\n\t\"encoding/json\"\n\t\"errors\"\n\t\"fmt\"\n\t\"os\"\n\t\"path/filepath\"\n\t\"reflect\"\n\t\"strings\"\n\t\"sync\"\n\t\"sync/atomic\"\n\t\"testing\"\n\t\"time\"\n\n\t\"github.com/hashicorp/go-hclog\"\n\t\"github.com/stretchr/testify/assert\"\n\t\"github.com/stretchr/testify/require\"\n)\n\nfunc TestRaft_StartStop(t *testing.T) {\n\tc := MakeCluster(1, t, nil)\n\tc.Close()\n}\n\nfunc TestRaft_AfterShutdown(t *testing.T) {\n\tc := MakeCluster(1, t, nil)\n\tc.Close()\n\traft := c.rafts[0]\n\n\t// Everything should fail now\n\tif f := raft.Apply(nil, 0); f.Error() != ErrRaftShutdown {\n\t\tt.Fatalf(\"should be shutdown: %v\", f.Error())\n\t}\n\n\t// TODO (slackpad) - Barrier, VerifyLeader, and GetConfiguration can get\n\t// stuck if the buffered channel consumes the future but things are shut\n\t// down so they never get processed.\n\tif f := raft.AddVoter(ServerID(\"id\"), ServerAddress(\"addr\"), 0, 0); f.Error() != ErrRaftShutdown {\n\t\tt.Fatalf(\"should be shutdown: %v\", f.Error())\n\t}\n\tif f := raft.AddNonvoter(ServerID(\"id\"), ServerAddress(\"addr\"), 0, 0); f.Error() != ErrRaftShutdown {\n\t\tt.Fatalf(\"should be shutdown: %v\", f.Error())\n\t}\n\tif f := raft.RemoveServer(ServerID(\"id\"), 0, 0); f.Error() != ErrRaftShutdown {\n\t\tt.Fatalf(\"should be shutdown: %v\", f.Error())\n\t}\n\tif f := raft.DemoteVoter(ServerID(\"id\"), 0, 0); f.Error() != ErrRaftShutdown {\n\t\tt.Fatalf(\"should be shutdown: %v\", f.Error())\n\t}\n\tif f := raft.Snapshot(); f.Error() != ErrRaftShutdown {\n\t\tt.Fatalf(\"should be shutdown: %v\", f.Error())\n\t}\n\n\t// Should be idempotent\n\tif f := raft.Shutdown(); f.Error() != nil {\n\t\tt.Fatalf(\"shutdown should be idempotent\")\n\t}\n}\n\nfunc TestRaft_LiveBootstrap(t *testing.T) {\n\t// Make the cluster.\n\tc := MakeClusterNoBootstrap(3, t, nil)\n\tdefer c.Close()\n\n\t// Build the configuration.\n\tconfiguration := Configuration{}\n\tfor _, r := range c.rafts {\n\t\tserver := Server{\n\t\t\tID:      r.localID,\n\t\t\tAddress: r.localAddr,\n\t\t}\n\t\tconfiguration.Servers = append(configuration.Servers, server)\n\t}\n\n\t// Bootstrap one of the nodes live.\n\tboot := c.rafts[0].BootstrapCluster(configuration)\n\tif err := boot.Error(); err != nil {\n\t\tt.Fatalf(\"bootstrap err: %v\", err)\n\t}\n\n\t// Should be one leader.\n\tc.Followers()\n\tleader := c.Leader()\n\tc.EnsureLeader(t, leader.localAddr)\n\n\t// Should be able to apply.\n\tfuture := leader.Apply([]byte(\"test\"), c.conf.CommitTimeout)\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"apply err: %v\", err)\n\t}\n\tc.WaitForReplication(1)\n\n\t// Make sure the live bootstrap fails now that things are started up.\n\tboot = c.rafts[0].BootstrapCluster(configuration)\n\tif err := boot.Error(); err != ErrCantBootstrap {\n\t\tt.Fatalf(\"bootstrap should have failed: %v\", err)\n\t}\n}\n\nfunc TestRaft_LiveBootstrap_From_NonVoter(t *testing.T) {\n\t// Make the cluster.\n\tc := MakeClusterNoBootstrap(2, t, nil)\n\tdefer c.Close()\n\n\t// Build the configuration.\n\tconfiguration := Configuration{}\n\tfor i, r := range c.rafts {\n\t\tserver := Server{\n\t\t\tID:      r.localID,\n\t\t\tAddress: r.localAddr,\n\t\t}\n\t\tif i == 0 {\n\t\t\tserver.Suffrage = Nonvoter\n\t\t}\n\t\tconfiguration.Servers = append(configuration.Servers, server)\n\t}\n\n\t// Bootstrap one of the nodes live (the non-voter).\n\tboot := c.rafts[0].BootstrapCluster(configuration)\n\tif err := boot.Error(); err != ErrNotVoter {\n\t\tt.Fatalf(\"bootstrap should have failed: %v\", err)\n\t}\n}\n\nfunc TestRaft_RecoverCluster_NoState(t *testing.T) {\n\tc := MakeClusterNoBootstrap(1, t, nil)\n\tdefer c.Close()\n\n\tr := c.rafts[0]\n\tconfiguration := Configuration{\n\t\tServers: []Server{\n\t\t\t{\n\t\t\t\tID:      r.localID,\n\t\t\t\tAddress: r.localAddr,\n\t\t\t},\n\t\t},\n\t}\n\tcfg := r.config()\n\terr := RecoverCluster(&cfg, &MockFSM{}, r.logs, r.stable,\n\t\tr.snapshots, r.trans, configuration)\n\tif err == nil || !strings.Contains(err.Error(), \"no initial state\") {\n\t\tt.Fatalf(\"should have failed for no initial state: %v\", err)\n\t}\n}\n\nfunc TestRaft_RecoverCluster(t *testing.T) {\n\tsnapshotThreshold := 5\n\trunRecover := func(t *testing.T, applies int) {\n\t\tvar err error\n\t\tconf := inmemConfig(t)\n\t\tconf.TrailingLogs = 10\n\t\tconf.SnapshotThreshold = uint64(snapshotThreshold)\n\t\tc := MakeCluster(3, t, conf)\n\t\tdefer c.Close()\n\n\t\t// Perform some commits.\n\t\tc.logger.Debug(\"running with\", \"applies\", applies)\n\t\tleader := c.Leader()\n\t\tfor i := 0; i < applies; i++ {\n\t\t\tfuture := leader.Apply([]byte(fmt.Sprintf(\"test%d\", i)), 0)\n\t\t\tif err = future.Error(); err != nil {\n\t\t\t\tt.Fatalf(\"[ERR] apply err: %v\", err)\n\t\t\t}\n\t\t}\n\n\t\t// Snap the configuration.\n\t\tfuture := leader.GetConfiguration()\n\t\tif err = future.Error(); err != nil {\n\t\t\tt.Fatalf(\"[ERR] get configuration err: %v\", err)\n\t\t}\n\t\tconfiguration := future.Configuration()\n\n\t\t// Shut down the cluster.\n\t\tfor _, sec := range c.rafts {\n\t\t\tif err = sec.Shutdown().Error(); err != nil {\n\t\t\t\tt.Fatalf(\"[ERR] shutdown err: %v\", err)\n\t\t\t}\n\t\t}\n\n\t\t// Recover the cluster. We need to replace the transport and we\n\t\t// replace the FSM so no state can carry over.\n\t\tfor i, r := range c.rafts {\n\t\t\tvar before []*SnapshotMeta\n\t\t\tbefore, err = r.snapshots.List()\n\t\t\tif err != nil {\n\t\t\t\tt.Fatalf(\"snapshot list err: %v\", err)\n\t\t\t}\n\t\t\tcfg := r.config()\n\t\t\tif err = RecoverCluster(&cfg, &MockFSM{}, r.logs, r.stable,\n\t\t\t\tr.snapshots, r.trans, configuration); err != nil {\n\t\t\t\tt.Fatalf(\"recover err: %v\", err)\n\t\t\t}\n\n\t\t\t// Make sure the recovery looks right.\n\t\t\tvar after []*SnapshotMeta\n\t\t\tafter, err = r.snapshots.List()\n\t\t\tif err != nil {\n\t\t\t\tt.Fatalf(\"snapshot list err: %v\", err)\n\t\t\t}\n\t\t\tif len(after) != len(before)+1 {\n\t\t\t\tt.Fatalf(\"expected a new snapshot, %d vs. %d\", len(before), len(after))\n\t\t\t}\n\t\t\tvar first uint64\n\t\t\tfirst, err = r.logs.FirstIndex()\n\t\t\tif err != nil {\n\t\t\t\tt.Fatalf(\"first log index err: %v\", err)\n\t\t\t}\n\t\t\tvar last uint64\n\t\t\tlast, err = r.logs.LastIndex()\n\t\t\tif err != nil {\n\t\t\t\tt.Fatalf(\"last log index err: %v\", err)\n\t\t\t}\n\t\t\tif first != 0 || last != 0 {\n\t\t\t\tt.Fatalf(\"expected empty logs, got %d/%d\", first, last)\n\t\t\t}\n\n\t\t\t// Fire up the recovered Raft instance. We have to patch\n\t\t\t// up the cluster state manually since this is an unusual\n\t\t\t// operation.\n\t\t\t_, trans := NewInmemTransport(r.localAddr)\n\t\t\tvar r2 *Raft\n\t\t\tr2, err = NewRaft(&cfg, &MockFSM{}, r.logs, r.stable, r.snapshots, trans)\n\t\t\tif err != nil {\n\t\t\t\tt.Fatalf(\"new raft err: %v\", err)\n\t\t\t}\n\t\t\tc.rafts[i] = r2\n\t\t\tc.trans[i] = r2.trans.(*InmemTransport)\n\t\t\tc.fsms[i] = r2.fsm.(*MockFSM)\n\t\t}\n\t\tc.FullyConnect()\n\t\ttime.Sleep(c.propagateTimeout * 3)\n\n\t\t// Let things settle and make sure we recovered.\n\t\tc.EnsureLeader(t, c.Leader().localAddr)\n\t\tc.EnsureSame(t)\n\t\tc.EnsureSamePeers(t)\n\t}\n\n\tt.Run(\"no snapshot, no trailing logs\", func(t *testing.T) {\n\t\trunRecover(t, 0)\n\t})\n\tt.Run(\"no snapshot, some trailing logs\", func(t *testing.T) {\n\t\trunRecover(t, snapshotThreshold-1)\n\t})\n\tt.Run(\"snapshot, with trailing logs\", func(t *testing.T) {\n\t\trunRecover(t, snapshotThreshold+20)\n\t})\n}\n\nfunc TestRaft_HasExistingState(t *testing.T) {\n\tvar err error\n\t// Make a cluster.\n\tc := MakeCluster(2, t, nil)\n\tdefer c.Close()\n\n\t// Make a new cluster of 1.\n\tc1 := MakeClusterNoBootstrap(1, t, nil)\n\n\t// Make sure the initial state is clean.\n\tvar hasState bool\n\thasState, err = HasExistingState(c1.rafts[0].logs, c1.rafts[0].stable, c1.rafts[0].snapshots)\n\tif err != nil || hasState {\n\t\tt.Fatalf(\"should not have any existing state, %v\", err)\n\t}\n\n\t// Merge clusters.\n\tc.Merge(c1)\n\tc.FullyConnect()\n\n\t// Join the new node in.\n\tfuture := c.Leader().AddVoter(c1.rafts[0].localID, c1.rafts[0].localAddr, 0, 0)\n\tif err = future.Error(); err != nil {\n\t\tt.Fatalf(\"[ERR] err: %v\", err)\n\t}\n\n\t// Check the FSMs.\n\tc.EnsureSame(t)\n\n\t// Check the peers.\n\tc.EnsureSamePeers(t)\n\n\t// Ensure one leader.\n\tc.EnsureLeader(t, c.Leader().localAddr)\n\n\t// Make sure it's not clean.\n\thasState, err = HasExistingState(c1.rafts[0].logs, c1.rafts[0].stable, c1.rafts[0].snapshots)\n\tif err != nil || !hasState {\n\t\tt.Fatalf(\"should have some existing state, %v\", err)\n\t}\n}\n\nfunc TestRaft_SingleNode(t *testing.T) {\n\tconf := inmemConfig(t)\n\tc := MakeCluster(1, t, conf)\n\tdefer c.Close()\n\traft := c.rafts[0]\n\n\t// Watch leaderCh for change\n\tselect {\n\tcase v := <-raft.LeaderCh():\n\t\tif !v {\n\t\t\tt.Fatalf(\"should become leader\")\n\t\t}\n\tcase <-time.After(conf.HeartbeatTimeout * 3):\n\t\tt.Fatalf(\"timeout becoming leader\")\n\t}\n\n\t// Should be leader\n\tif s := raft.State(); s != Leader {\n\t\tt.Fatalf(\"expected leader: %v\", s)\n\t}\n\n\t// Should be able to apply\n\tfuture := raft.Apply([]byte(\"test\"), c.conf.HeartbeatTimeout)\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Check the response\n\tif future.Response().(int) != 1 {\n\t\tt.Fatalf(\"bad response: %v\", future.Response())\n\t}\n\n\t// Check the index\n\tif idx := future.Index(); idx == 0 {\n\t\tt.Fatalf(\"bad index: %d\", idx)\n\t}\n\n\t// Check that it is applied to the FSM\n\tif len(getMockFSM(c.fsms[0]).logs) != 1 {\n\t\tt.Fatalf(\"did not apply to FSM!\")\n\t}\n}\n\nfunc TestRaft_TripleNode(t *testing.T) {\n\t// Make the cluster\n\tc := MakeCluster(3, t, nil)\n\tdefer c.Close()\n\n\t// Should be one leader\n\tc.Followers()\n\tleader := c.Leader()\n\tc.EnsureLeader(t, leader.localAddr)\n\n\t// Should be able to apply\n\tfuture := leader.Apply([]byte(\"test\"), c.conf.CommitTimeout)\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tc.WaitForReplication(1)\n}\n\nfunc TestRaft_LeaderFail(t *testing.T) {\n\t// Make the cluster\n\tc := MakeCluster(3, t, nil)\n\tdefer c.Close()\n\n\t// Should be one leader\n\tc.Followers()\n\tleader := c.Leader()\n\n\t// Should be able to apply\n\tfuture := leader.Apply([]byte(\"test\"), c.conf.CommitTimeout)\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tc.WaitForReplication(1)\n\n\t// Disconnect the leader now\n\tt.Logf(\"[INFO] Disconnecting %v\", leader)\n\tleaderTerm := leader.getCurrentTerm()\n\tc.Disconnect(leader.localAddr)\n\n\t// Wait for new leader\n\tlimit := time.Now().Add(c.longstopTimeout)\n\tvar newLead *Raft\n\tfor time.Now().Before(limit) && newLead == nil {\n\t\tc.WaitEvent(nil, c.conf.CommitTimeout)\n\t\tleaders := c.GetInState(Leader)\n\t\tif len(leaders) == 1 && leaders[0] != leader {\n\t\t\tnewLead = leaders[0]\n\t\t}\n\t}\n\tif newLead == nil {\n\t\tt.Fatalf(\"expected new leader\")\n\t}\n\n\t// Ensure the term is greater\n\tif newLead.getCurrentTerm() <= leaderTerm {\n\t\tt.Fatalf(\"expected newer term! %d %d (%v, %v)\", newLead.getCurrentTerm(), leaderTerm, newLead, leader)\n\t}\n\n\t// Apply should work not work on old leader\n\tfuture1 := leader.Apply([]byte(\"fail\"), c.conf.CommitTimeout)\n\n\t// Apply should work on newer leader\n\tfuture2 := newLead.Apply([]byte(\"apply\"), c.conf.CommitTimeout)\n\n\t// Future2 should work\n\tif err := future2.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Reconnect the networks\n\tt.Logf(\"[INFO] Reconnecting %v\", leader)\n\tc.FullyConnect()\n\n\t// Future1 should fail\n\tif err := future1.Error(); err != ErrLeadershipLost && err != ErrNotLeader {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Wait for log replication\n\tc.EnsureSame(t)\n\n\t// Check two entries are applied to the FSM\n\tfor _, fsmRaw := range c.fsms {\n\t\tfsm := getMockFSM(fsmRaw)\n\t\tfsm.Lock()\n\t\tif len(fsm.logs) != 2 {\n\t\t\tt.Fatalf(\"did not apply both to FSM! %v\", fsm.logs)\n\t\t}\n\n\t\trequire.Equal(t, fsm.logs[0], []byte(\"test\"))\n\t\trequire.Equal(t, fsm.logs[1], []byte(\"apply\"))\n\t\tfsm.Unlock()\n\t}\n}\n\nfunc TestRaft_BehindFollower(t *testing.T) {\n\t// Make the cluster\n\tc := MakeCluster(3, t, nil)\n\tdefer c.Close()\n\n\t// Disconnect one follower\n\tleader := c.Leader()\n\tfollowers := c.Followers()\n\tbehind := followers[0]\n\tc.Disconnect(behind.localAddr)\n\n\t// Commit a lot of things\n\tvar future Future\n\tfor i := 0; i < 100; i++ {\n\t\tfuture = leader.Apply([]byte(fmt.Sprintf(\"test%d\", i)), 0)\n\t}\n\n\t// Wait for the last future to apply\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t} else {\n\t\tt.Logf(\"[INFO] Finished apply without behind follower\")\n\t}\n\n\t// Check that we have a non zero last contact\n\tif behind.LastContact().IsZero() {\n\t\tt.Fatalf(\"expected previous contact\")\n\t}\n\n\t// Reconnect the behind node\n\tc.FullyConnect()\n\n\t// Ensure all the logs are the same\n\tc.EnsureSame(t)\n\n\t// Ensure one leader\n\tleader = c.Leader()\n\tc.EnsureLeader(t, leader.localAddr)\n}\n\nfunc TestRaft_ApplyNonLeader(t *testing.T) {\n\t// Make the cluster\n\tc := MakeCluster(3, t, nil)\n\tdefer c.Close()\n\n\t// Wait for a leader\n\tc.Leader()\n\n\t// Try to apply to them\n\tfollowers := c.GetInState(Follower)\n\tif len(followers) != 2 {\n\t\tt.Fatalf(\"Expected 2 followers\")\n\t}\n\tfollower := followers[0]\n\n\t// Try to apply\n\tfuture := follower.Apply([]byte(\"test\"), c.conf.CommitTimeout)\n\tif future.Error() != ErrNotLeader {\n\t\tt.Fatalf(\"should not apply on follower\")\n\t}\n\n\t// Should be cached\n\tif future.Error() != ErrNotLeader {\n\t\tt.Fatalf(\"should not apply on follower\")\n\t}\n}\n\nfunc TestRaft_ApplyConcurrent(t *testing.T) {\n\t// Make the cluster\n\tconf := inmemConfig(t)\n\tconf.HeartbeatTimeout = 2 * conf.HeartbeatTimeout\n\tconf.ElectionTimeout = 2 * conf.ElectionTimeout\n\tc := MakeCluster(3, t, conf)\n\tdefer c.Close()\n\n\t// Wait for a leader\n\tleader := c.Leader()\n\n\t// Create a wait group\n\tconst sz = 100\n\tvar group sync.WaitGroup\n\tgroup.Add(sz)\n\n\tapplyF := func(i int) {\n\t\tdefer group.Done()\n\t\tfuture := leader.Apply([]byte(fmt.Sprintf(\"test%d\", i)), 0)\n\t\tif err := future.Error(); err != nil {\n\t\t\tc.Failf(\"[ERR] err: %v\", err)\n\t\t}\n\t}\n\n\t// Concurrently apply\n\tfor i := 0; i < sz; i++ {\n\t\tgo applyF(i)\n\t}\n\n\t// Wait to finish\n\tdoneCh := make(chan struct{})\n\tgo func() {\n\t\tgroup.Wait()\n\t\tclose(doneCh)\n\t}()\n\tselect {\n\tcase <-doneCh:\n\tcase <-time.After(c.longstopTimeout):\n\t\tt.Fatalf(\"timeout\")\n\t}\n\n\t// If anything failed up to this point then bail now, rather than do a\n\t// confusing compare.\n\tif t.Failed() {\n\t\tt.Fatalf(\"One or more of the apply operations failed\")\n\t}\n\n\t// Check the FSMs\n\tc.EnsureSame(t)\n}\n\nfunc TestRaft_ApplyConcurrent_Timeout(t *testing.T) {\n\t// Make the cluster\n\tconf := inmemConfig(t)\n\tconf.CommitTimeout = 1 * time.Millisecond\n\tconf.HeartbeatTimeout = 2 * conf.HeartbeatTimeout\n\tconf.ElectionTimeout = 2 * conf.ElectionTimeout\n\tc := MakeCluster(1, t, conf)\n\tdefer c.Close()\n\n\t// Wait for a leader\n\tleader := c.Leader()\n\n\t// Enough enqueues should cause at least one timeout...\n\tvar didTimeout int32\n\tfor i := 0; (i < 5000) && (atomic.LoadInt32(&didTimeout) == 0); i++ {\n\t\tgo func(i int) {\n\t\t\tfuture := leader.Apply([]byte(fmt.Sprintf(\"test%d\", i)), time.Microsecond)\n\t\t\tif future.Error() == ErrEnqueueTimeout {\n\t\t\t\tatomic.StoreInt32(&didTimeout, 1)\n\t\t\t}\n\t\t}(i)\n\n\t\t// Give the leader loop some other things to do in order to\n\t\t// increase the odds of a timeout.\n\t\tif i%5 == 0 {\n\t\t\tleader.VerifyLeader()\n\t\t}\n\t}\n\n\t// Loop until we see a timeout, or give up.\n\tlimit := time.Now().Add(c.longstopTimeout)\n\tfor time.Now().Before(limit) {\n\t\tif atomic.LoadInt32(&didTimeout) != 0 {\n\t\t\treturn\n\t\t}\n\t\tc.WaitEvent(nil, c.propagateTimeout)\n\t}\n\tt.Fatalf(\"Timeout waiting to detect apply timeouts\")\n}\n\nfunc TestRaft_JoinNode(t *testing.T) {\n\t// Make a cluster\n\tc := MakeCluster(2, t, nil)\n\tdefer c.Close()\n\n\t// Make a new cluster of 1\n\tc1 := MakeClusterNoBootstrap(1, t, nil)\n\n\t// Merge clusters\n\tc.Merge(c1)\n\tc.FullyConnect()\n\n\t// Join the new node in\n\tfuture := c.Leader().AddVoter(c1.rafts[0].localID, c1.rafts[0].localAddr, 0, 0)\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Ensure one leader\n\tc.EnsureLeader(t, c.Leader().localAddr)\n\n\t// Check the FSMs\n\tc.EnsureSame(t)\n\n\t// Check the peers\n\tc.EnsureSamePeers(t)\n}\n\nfunc TestRaft_JoinNode_ConfigStore(t *testing.T) {\n\t// Make a cluster\n\tconf := inmemConfig(t)\n\tc := makeCluster(t, &MakeClusterOpts{\n\t\tPeers:          1,\n\t\tBootstrap:      true,\n\t\tConf:           conf,\n\t\tConfigStoreFSM: true,\n\t})\n\tdefer c.Close()\n\n\t// Make a new nodes\n\tc1 := makeCluster(t, &MakeClusterOpts{\n\t\tPeers:          1,\n\t\tBootstrap:      false,\n\t\tConf:           conf,\n\t\tConfigStoreFSM: true,\n\t})\n\tc2 := makeCluster(t, &MakeClusterOpts{\n\t\tPeers:          1,\n\t\tBootstrap:      false,\n\t\tConf:           conf,\n\t\tConfigStoreFSM: true,\n\t})\n\n\t// Merge clusters\n\tc.Merge(c1)\n\tc.Merge(c2)\n\tc.FullyConnect()\n\n\t// Join the new node in\n\tfuture := c.Leader().AddVoter(c1.rafts[0].localID, c1.rafts[0].localAddr, 0, 0)\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\t// Join the new node in\n\tfuture = c.Leader().AddVoter(c2.rafts[0].localID, c2.rafts[0].localAddr, 0, 0)\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Ensure one leader\n\tc.EnsureLeader(t, c.Leader().localAddr)\n\n\t// Check the FSMs\n\tc.EnsureSame(t)\n\n\t// Check the peers\n\tc.EnsureSamePeers(t)\n\n\t// Check the fsm holds the correct config logs\n\tfor _, fsmRaw := range c.fsms {\n\t\tfsm := getMockFSM(fsmRaw)\n\t\tif len(fsm.configurations) != 3 {\n\t\t\tt.Fatalf(\"unexpected number of configuration changes: %d\", len(fsm.configurations))\n\t\t}\n\t\tif len(fsm.configurations[0].Servers) != 1 {\n\t\t\tt.Fatalf(\"unexpected number of servers in config change: %v\", fsm.configurations[0].Servers)\n\t\t}\n\t\tif len(fsm.configurations[1].Servers) != 2 {\n\t\t\tt.Fatalf(\"unexpected number of servers in config change: %v\", fsm.configurations[1].Servers)\n\t\t}\n\t\tif len(fsm.configurations[2].Servers) != 3 {\n\t\t\tt.Fatalf(\"unexpected number of servers in config change: %v\", fsm.configurations[2].Servers)\n\t\t}\n\t}\n}\n\nfunc TestRaft_RemoveFollower(t *testing.T) {\n\t// Make a cluster\n\tc := MakeCluster(3, t, nil)\n\tdefer c.Close()\n\n\t// Get the leader\n\tleader := c.Leader()\n\n\t// Wait until we have 2 followers\n\tlimit := time.Now().Add(c.longstopTimeout)\n\tvar followers []*Raft\n\tfor time.Now().Before(limit) && len(followers) != 2 {\n\t\tc.WaitEvent(nil, c.conf.CommitTimeout)\n\t\tfollowers = c.GetInState(Follower)\n\t}\n\tif len(followers) != 2 {\n\t\tt.Fatalf(\"expected two followers: %v\", followers)\n\t}\n\n\t// Remove a follower\n\tfollower := followers[0]\n\tfuture := leader.RemoveServer(follower.localID, 0, 0)\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Wait a while\n\ttime.Sleep(c.propagateTimeout)\n\n\t// Other nodes should have fewer peers\n\tif configuration := c.getConfiguration(leader); len(configuration.Servers) != 2 {\n\t\tt.Fatalf(\"too many peers\")\n\t}\n\tif configuration := c.getConfiguration(followers[1]); len(configuration.Servers) != 2 {\n\t\tt.Fatalf(\"too many peers\")\n\t}\n\n\t// The removed node should remain in a follower state\n\trequire.Equal(t, Follower, follower.getState())\n}\n\nfunc TestRaft_RemoveLeader(t *testing.T) {\n\t// Make a cluster\n\tc := MakeCluster(3, t, nil)\n\tdefer c.Close()\n\n\t// Get the leader\n\tleader := c.Leader()\n\n\t// Wait until we have 2 followers\n\tlimit := time.Now().Add(c.longstopTimeout)\n\tvar followers []*Raft\n\tfor time.Now().Before(limit) && len(followers) != 2 {\n\t\tc.WaitEvent(nil, c.conf.CommitTimeout)\n\t\tfollowers = c.GetInState(Follower)\n\t}\n\tif len(followers) != 2 {\n\t\tt.Fatalf(\"expected two followers: %v\", followers)\n\t}\n\n\t// Remove the leader\n\tf := leader.RemoveServer(leader.localID, 0, 0)\n\n\t// Wait for the future to complete\n\tif f.Error() != nil {\n\t\tt.Fatalf(\"RemoveServer() returned error %v\", f.Error())\n\t}\n\n\t// Wait a bit for log application\n\ttime.Sleep(c.propagateTimeout)\n\n\t// Should have a new leader\n\ttime.Sleep(c.propagateTimeout)\n\tnewLeader := c.Leader()\n\tif newLeader == leader {\n\t\tt.Fatalf(\"removed leader is still leader\")\n\t}\n\n\t// Other nodes should have fewer peers\n\tif configuration := c.getConfiguration(newLeader); len(configuration.Servers) != 2 {\n\t\tt.Fatalf(\"wrong number of peers %d\", len(configuration.Servers))\n\t}\n\n\t// Old leader should be shutdown\n\tif leader.State() != Shutdown {\n\t\tt.Fatalf(\"old leader should be shutdown\")\n\t}\n}\n\nfunc TestRaft_RemoveLeader_NoShutdown(t *testing.T) {\n\t// Make a cluster\n\tconf := inmemConfig(t)\n\tconf.ShutdownOnRemove = false\n\tc := MakeCluster(3, t, conf)\n\tdefer c.Close()\n\n\t// Get the leader\n\tc.Followers()\n\tleader := c.Leader()\n\n\t// Remove the leader\n\tfor i := byte(0); i < 100; i++ {\n\t\tif i == 80 {\n\t\t\tremoveFuture := leader.RemoveServer(leader.localID, 0, 0)\n\t\t\tif err := removeFuture.Error(); err != nil {\n\t\t\t\tt.Fatalf(\"err: %v, remove leader failed\", err)\n\t\t\t}\n\t\t}\n\t\tfuture := leader.Apply([]byte{i}, 0)\n\t\tif i > 80 {\n\t\t\tif err := future.Error(); err == nil || err != ErrNotLeader {\n\t\t\t\tt.Fatalf(\"err: %v, future entries should fail\", err)\n\t\t\t}\n\t\t}\n\t}\n\n\t// Wait a while\n\ttime.Sleep(c.propagateTimeout)\n\n\t// Should have a new leader\n\tnewLeader := c.Leader()\n\n\t// Wait a bit for log application\n\ttime.Sleep(c.propagateTimeout)\n\n\t// Other nodes should have pulled the leader.\n\tconfiguration := c.getConfiguration(newLeader)\n\tif len(configuration.Servers) != 2 {\n\t\tt.Fatalf(\"too many peers\")\n\t}\n\tif hasVote(configuration, leader.localID) {\n\t\tt.Fatalf(\"old leader should no longer have a vote\")\n\t}\n\n\t// Old leader should be a follower.\n\tif leader.State() != Follower {\n\t\tt.Fatalf(\"leader should be follower\")\n\t}\n\n\t// Old leader should not include itself in its peers.\n\tconfiguration = c.getConfiguration(leader)\n\tif len(configuration.Servers) != 2 {\n\t\tt.Fatalf(\"too many peers\")\n\t}\n\tif hasVote(configuration, leader.localID) {\n\t\tt.Fatalf(\"old leader should no longer have a vote\")\n\t}\n\n\t// Other nodes should have the same state\n\tc.EnsureSame(t)\n}\n\nfunc TestRaft_RemoveFollower_SplitCluster(t *testing.T) {\n\t// Make a cluster.\n\tconf := inmemConfig(t)\n\tc := MakeCluster(4, t, conf)\n\tdefer c.Close()\n\n\t// Wait for a leader to get elected.\n\tleader := c.Leader()\n\n\t// Wait to make sure knowledge of the 4th server is known to all the\n\t// peers.\n\tnumServers := 0\n\tlimit := time.Now().Add(c.longstopTimeout)\n\tfor time.Now().Before(limit) && numServers != 4 {\n\t\ttime.Sleep(c.propagateTimeout)\n\t\tconfiguration := c.getConfiguration(leader)\n\t\tnumServers = len(configuration.Servers)\n\t}\n\tif numServers != 4 {\n\t\tt.Fatalf(\"Leader should have 4 servers, got %d\", numServers)\n\t}\n\tc.EnsureSamePeers(t)\n\n\t// Isolate two of the followers.\n\tfollowers := c.Followers()\n\tif len(followers) != 3 {\n\t\tt.Fatalf(\"Expected 3 followers, got %d\", len(followers))\n\t}\n\tc.Partition([]ServerAddress{followers[0].localAddr, followers[1].localAddr})\n\n\t// Try to remove the remaining follower that was left with the leader.\n\tfuture := leader.RemoveServer(followers[2].localID, 0, 0)\n\tif err := future.Error(); err == nil {\n\t\tt.Fatalf(\"Should not have been able to make peer change\")\n\t}\n}\n\nfunc TestRaft_AddKnownPeer(t *testing.T) {\n\t// Make a cluster\n\tc := MakeCluster(3, t, nil)\n\tdefer c.Close()\n\n\t// Get the leader\n\tleader := c.Leader()\n\tfollowers := c.GetInState(Follower)\n\n\tconfigReq := &configurationsFuture{}\n\tconfigReq.init()\n\tleader.configurationsCh <- configReq\n\tif err := configReq.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tstartingConfig := configReq.configurations.committed\n\tstartingConfigIdx := configReq.configurations.committedIndex\n\n\t// Add a follower\n\tfuture := leader.AddVoter(followers[0].localID, followers[0].localAddr, 0, 0)\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"AddVoter() err: %v\", err)\n\t}\n\tconfigReq = &configurationsFuture{}\n\tconfigReq.init()\n\tleader.configurationsCh <- configReq\n\tif err := configReq.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tnewConfig := configReq.configurations.committed\n\tnewConfigIdx := configReq.configurations.committedIndex\n\tif newConfigIdx <= startingConfigIdx {\n\t\tt.Fatalf(\"AddVoter should have written a new config entry, but configurations.committedIndex still %d\", newConfigIdx)\n\t}\n\tif !reflect.DeepEqual(newConfig, startingConfig) {\n\t\tt.Fatalf(\"[ERR} AddVoter with existing peer shouldn't have changed config, was %#v, but now %#v\", startingConfig, newConfig)\n\t}\n}\n\nfunc TestRaft_RemoveUnknownPeer(t *testing.T) {\n\t// Make a cluster\n\tc := MakeCluster(3, t, nil)\n\tdefer c.Close()\n\n\t// Get the leader\n\tleader := c.Leader()\n\tconfigReq := &configurationsFuture{}\n\tconfigReq.init()\n\tleader.configurationsCh <- configReq\n\tif err := configReq.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tstartingConfig := configReq.configurations.committed\n\tstartingConfigIdx := configReq.configurations.committedIndex\n\n\t// Remove unknown\n\tfuture := leader.RemoveServer(ServerID(NewInmemAddr()), 0, 0)\n\n\t// nothing to do, should be a new config entry that's the same as before\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"RemoveServer() err: %v\", err)\n\t}\n\tconfigReq = &configurationsFuture{}\n\tconfigReq.init()\n\tleader.configurationsCh <- configReq\n\tif err := configReq.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tnewConfig := configReq.configurations.committed\n\tnewConfigIdx := configReq.configurations.committedIndex\n\tif newConfigIdx <= startingConfigIdx {\n\t\tt.Fatalf(\"RemoveServer should have written a new config entry, but configurations.committedIndex still %d\", newConfigIdx)\n\t}\n\tif !reflect.DeepEqual(newConfig, startingConfig) {\n\t\tt.Fatalf(\"[ERR} RemoveServer with unknown peer shouldn't of changed config, was %#v, but now %#v\", startingConfig, newConfig)\n\t}\n}\n\nfunc TestRaft_SnapshotRestore(t *testing.T) {\n\t// Make the cluster\n\tconf := inmemConfig(t)\n\tconf.TrailingLogs = 10\n\tc := MakeCluster(1, t, conf)\n\tdefer c.Close()\n\n\t// Commit a lot of things\n\tleader := c.Leader()\n\tvar future Future\n\tfor i := 0; i < 100; i++ {\n\t\tfuture = leader.Apply([]byte(fmt.Sprintf(\"test%d\", i)), 0)\n\t}\n\n\t// Wait for the last future to apply\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Take a snapshot\n\tsnapFuture := leader.Snapshot()\n\tif err := snapFuture.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Check for snapshot\n\tsnaps, _ := leader.snapshots.List()\n\tif len(snaps) != 1 {\n\t\tt.Fatalf(\"should have a snapshot\")\n\t}\n\tsnap := snaps[0]\n\n\t// Logs should be trimmed\n\tif idx, _ := leader.logs.FirstIndex(); idx != snap.Index-conf.TrailingLogs+1 {\n\t\tt.Fatalf(\"should trim logs to %d: but is %d\", snap.Index-conf.TrailingLogs+1, idx)\n\t}\n\n\t// Shutdown\n\tshutdown := leader.Shutdown()\n\tif err := shutdown.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Restart the Raft\n\tr := leader\n\t// Can't just reuse the old transport as it will be closed\n\t_, trans2 := NewInmemTransport(r.trans.LocalAddr())\n\tcfg := r.config()\n\tr, err := NewRaft(&cfg, r.fsm, r.logs, r.stable, r.snapshots, trans2)\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tc.rafts[0] = r\n\n\t// We should have restored from the snapshot!\n\tif last := r.getLastApplied(); last != snap.Index {\n\t\tt.Fatalf(\"bad last index: %d, expecting %d\", last, snap.Index)\n\t}\n}\n\nfunc TestRaft_RestoreSnapshotOnStartup_Monotonic(t *testing.T) {\n\t// Make the cluster\n\tconf := inmemConfig(t)\n\tconf.TrailingLogs = 10\n\topts := &MakeClusterOpts{\n\t\tPeers:         1,\n\t\tBootstrap:     true,\n\t\tConf:          conf,\n\t\tMonotonicLogs: true,\n\t}\n\tc := MakeClusterCustom(t, opts)\n\tdefer c.Close()\n\n\tleader := c.Leader()\n\n\t// Commit a lot of things\n\tvar future Future\n\tfor i := 0; i < 100; i++ {\n\t\tfuture = leader.Apply([]byte(fmt.Sprintf(\"test%d\", i)), 0)\n\t}\n\n\t// Wait for the last future to apply\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Take a snapshot\n\tsnapFuture := leader.Snapshot()\n\tif err := snapFuture.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Check for snapshot\n\tsnaps, _ := leader.snapshots.List()\n\tif len(snaps) != 1 {\n\t\tt.Fatalf(\"should have a snapshot\")\n\t}\n\tsnap := snaps[0]\n\n\t// Logs should be trimmed\n\tfirstIdx, err := leader.logs.FirstIndex()\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tlastIdx, err := leader.logs.LastIndex()\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\tif firstIdx != snap.Index-conf.TrailingLogs+1 {\n\t\tt.Fatalf(\"should trim logs to %d: but is %d\", snap.Index-conf.TrailingLogs+1, firstIdx)\n\t}\n\n\t// Shutdown\n\tshutdown := leader.Shutdown()\n\tif err := shutdown.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Restart the Raft\n\tr := leader\n\t// Can't just reuse the old transport as it will be closed\n\t_, trans2 := NewInmemTransport(r.trans.LocalAddr())\n\tcfg := r.config()\n\tr, err = NewRaft(&cfg, r.fsm, r.logs, r.stable, r.snapshots, trans2)\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tc.rafts[0] = r\n\n\t// We should have restored from the snapshot!\n\tif last := r.getLastApplied(); last != snap.Index {\n\t\tt.Fatalf(\"bad last index: %d, expecting %d\", last, snap.Index)\n\t}\n\n\t// Verify that logs have not been reset\n\tfirst, _ := r.logs.FirstIndex()\n\tlast, _ := r.logs.LastIndex()\n\tassert.Equal(t, firstIdx, first)\n\tassert.Equal(t, lastIdx, last)\n}\n\nfunc TestRaft_SnapshotRestore_Progress(t *testing.T) {\n\t// Make the cluster\n\tconf := inmemConfig(t)\n\tconf.TrailingLogs = 10\n\tc := MakeCluster(1, t, conf)\n\tdefer c.Close()\n\n\t// Commit a lot of things\n\tleader := c.Leader()\n\tvar future Future\n\tfor i := 0; i < 100; i++ {\n\t\tfuture = leader.Apply([]byte(fmt.Sprintf(\"test%d\", i)), 0)\n\t}\n\n\t// Wait for the last future to apply\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Take a snapshot\n\tsnapFuture := leader.Snapshot()\n\tif err := snapFuture.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Check for snapshot\n\tsnaps, _ := leader.snapshots.List()\n\tif len(snaps) != 1 {\n\t\tt.Fatalf(\"should have a snapshot\")\n\t}\n\tsnap := snaps[0]\n\n\t// Logs should be trimmed\n\tif idx, _ := leader.logs.FirstIndex(); idx != snap.Index-conf.TrailingLogs+1 {\n\t\tt.Fatalf(\"should trim logs to %d: but is %d\", snap.Index-conf.TrailingLogs+1, idx)\n\t}\n\n\t// Shutdown\n\tshutdown := leader.Shutdown()\n\tif err := shutdown.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Restart the Raft\n\tr := leader\n\t// Can't just reuse the old transport as it will be closed\n\t_, trans2 := NewInmemTransport(r.trans.LocalAddr())\n\tcfg := r.config()\n\n\t// Intercept logs and look for specific log messages.\n\tvar logbuf lockedBytesBuffer\n\tcfg.Logger = hclog.New(&hclog.LoggerOptions{\n\t\tName:       \"test\",\n\t\tJSONFormat: true,\n\t\tLevel:      hclog.Info,\n\t\tOutput:     &logbuf,\n\t})\n\tr, err := NewRaft(&cfg, r.fsm, r.logs, r.stable, r.snapshots, trans2)\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tc.rafts[0] = r\n\n\t// We should have restored from the snapshot!\n\tif last := r.getLastApplied(); last != snap.Index {\n\t\tt.Fatalf(\"bad last index: %d, expecting %d\", last, snap.Index)\n\t}\n\n\t{\n\t\tdec := json.NewDecoder(strings.NewReader(logbuf.String()))\n\n\t\tfound := false\n\n\t\ttype partialRecord struct {\n\t\t\tMessage         string `json:\"@message\"`\n\t\t\tPercentComplete string `json:\"percent-complete\"`\n\t\t}\n\n\t\tfor !found {\n\t\t\tvar record partialRecord\n\t\t\tif err := dec.Decode(&record); err != nil {\n\t\t\t\tt.Fatalf(\"error while decoding json logs: %v\", err)\n\t\t\t}\n\n\t\t\tif record.Message == \"snapshot restore progress\" && record.PercentComplete == \"100.00%\" {\n\t\t\t\tfound = true\n\t\t\t\tbreak\n\t\t\t}\n\n\t\t}\n\t\tif !found {\n\t\t\tt.Fatalf(\"could not find a log line indicating that snapshot restore progress was being logged\")\n\t\t}\n\t}\n}\n\ntype lockedBytesBuffer struct {\n\tmu  sync.Mutex\n\tbuf bytes.Buffer\n}\n\nfunc (b *lockedBytesBuffer) Write(p []byte) (n int, err error) {\n\tb.mu.Lock()\n\tdefer b.mu.Unlock()\n\treturn b.buf.Write(p)\n}\n\nfunc (b *lockedBytesBuffer) String() string {\n\tb.mu.Lock()\n\tdefer b.mu.Unlock()\n\treturn b.buf.String()\n}\n\n// TODO: Need a test that has a previous format Snapshot and check that it can\n// be read/installed on the new code.\n\n// TODO: Need a test to process old-style entries in the Raft log when starting\n// up.\n\nfunc TestRaft_NoRestoreOnStart(t *testing.T) {\n\tconf := inmemConfig(t)\n\tconf.TrailingLogs = 10\n\tconf.NoSnapshotRestoreOnStart = true\n\tc := MakeCluster(1, t, conf)\n\n\t// Commit a lot of things.\n\tleader := c.Leader()\n\tvar future Future\n\tfor i := 0; i < 100; i++ {\n\t\tfuture = leader.Apply([]byte(fmt.Sprintf(\"test%d\", i)), 0)\n\t}\n\n\t// Wait for the last future to apply\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Take a snapshot.\n\tsnapFuture := leader.Snapshot()\n\tif err := snapFuture.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Shutdown.\n\tshutdown := leader.Shutdown()\n\tif err := shutdown.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t_, trans := NewInmemTransport(leader.localAddr)\n\tnewFSM := &MockFSM{}\n\tcfg := leader.config()\n\t_, err := NewRaft(&cfg, newFSM, leader.logs, leader.stable, leader.snapshots, trans)\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\tif len(newFSM.logs) != 0 {\n\t\tt.Fatalf(\"expected empty FSM, got %v\", newFSM)\n\t}\n}\n\nfunc TestRaft_SnapshotRestore_PeerChange(t *testing.T) {\n\tvar err error\n\t// Make the cluster.\n\tconf := inmemConfig(t)\n\tconf.ProtocolVersion = 1\n\tconf.TrailingLogs = 10\n\tc := MakeCluster(3, t, conf)\n\tdefer c.Close()\n\n\t// Commit a lot of things.\n\tleader := c.Leader()\n\tvar future Future\n\tfor i := 0; i < 100; i++ {\n\t\tfuture = leader.Apply([]byte(fmt.Sprintf(\"test%d\", i)), 0)\n\t}\n\n\t// Wait for the last future to apply\n\tif err = future.Error(); err != nil {\n\t\tt.Fatalf(\"[ERR] err: %v\", err)\n\t}\n\n\t// Take a snapshot.\n\tsnapFuture := leader.Snapshot()\n\tif err = snapFuture.Error(); err != nil {\n\t\tt.Fatalf(\"[ERR] err: %v\", err)\n\t}\n\n\t// Shutdown.\n\tshutdown := leader.Shutdown()\n\tif err = shutdown.Error(); err != nil {\n\t\tt.Fatalf(\"[ERR] err: %v\", err)\n\t}\n\n\t// Make a separate cluster.\n\tc2 := MakeClusterNoBootstrap(2, t, conf)\n\tdefer c2.Close()\n\n\t// Kill the old cluster.\n\tfor _, sec := range c.rafts {\n\t\tif sec != leader {\n\t\t\tif err = sec.Shutdown().Error(); err != nil {\n\t\t\t\tt.Fatalf(\"[ERR] shutdown err: %v\", err)\n\t\t\t}\n\t\t}\n\t}\n\n\t// Restart the Raft with new peers.\n\tr := leader\n\n\t// Gather the new peer address list.\n\tvar peers []string\n\tpeers = append(peers, fmt.Sprintf(\"%q\", leader.trans.LocalAddr()))\n\tfor _, sec := range c2.rafts {\n\t\tpeers = append(peers, fmt.Sprintf(\"%q\", sec.trans.LocalAddr()))\n\t}\n\tcontent := []byte(fmt.Sprintf(\"[%s]\", strings.Join(peers, \",\")))\n\n\t// Perform a manual recovery on the cluster.\n\tbase, err := os.MkdirTemp(\"\", \"\")\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tdefer func() { _ = os.RemoveAll(base) }()\n\tpeersFile := filepath.Join(base, \"peers.json\")\n\tif err = os.WriteFile(peersFile, content, 0o666); err != nil {\n\t\tt.Fatalf(\"[ERR] err: %v\", err)\n\t}\n\tconfiguration, err := ReadPeersJSON(peersFile)\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tcfg := r.config()\n\tif err = RecoverCluster(&cfg, &MockFSM{}, r.logs, r.stable,\n\t\tr.snapshots, r.trans, configuration); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Can't just reuse the old transport as it will be closed. We also start\n\t// with a fresh FSM for good measure so no state can carry over.\n\t_, trans := NewInmemTransport(r.localAddr)\n\tr, err = NewRaft(&cfg, &MockFSM{}, r.logs, r.stable, r.snapshots, trans)\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tc.rafts[0] = r\n\tc2.rafts = append(c2.rafts, r)\n\tc2.trans = append(c2.trans, r.trans.(*InmemTransport))\n\tc2.fsms = append(c2.fsms, r.fsm.(*MockFSM))\n\tc2.FullyConnect()\n\n\t// Wait a while.\n\ttime.Sleep(c.propagateTimeout)\n\n\t// Ensure we elect a leader, and that we replicate to our new followers.\n\tc2.EnsureSame(t)\n\n\t// We should have restored from the snapshot! Note that there's one\n\t// index bump from the noop the leader tees up when it takes over.\n\tif last := r.getLastApplied(); last != 103 {\n\t\tt.Fatalf(\"bad last: %v\", last)\n\t}\n\n\t// Check the peers.\n\tc2.EnsureSamePeers(t)\n}\n\nfunc TestRaft_AutoSnapshot(t *testing.T) {\n\t// Make the cluster\n\tconf := inmemConfig(t)\n\tconf.SnapshotInterval = conf.CommitTimeout * 2\n\tconf.SnapshotThreshold = 50\n\tconf.TrailingLogs = 10\n\tc := MakeCluster(1, t, conf)\n\tdefer c.Close()\n\n\t// Commit a lot of things\n\tleader := c.Leader()\n\tvar future Future\n\tfor i := 0; i < 100; i++ {\n\t\tfuture = leader.Apply([]byte(fmt.Sprintf(\"test%d\", i)), 0)\n\t}\n\n\t// Wait for the last future to apply\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Wait for a snapshot to happen\n\ttime.Sleep(c.propagateTimeout)\n\n\t// Check for snapshot\n\tif snaps, _ := leader.snapshots.List(); len(snaps) == 0 {\n\t\tt.Fatalf(\"should have a snapshot\")\n\t}\n}\n\nfunc TestRaft_UserSnapshot(t *testing.T) {\n\t// Make the cluster.\n\tconf := inmemConfig(t)\n\tconf.SnapshotThreshold = 50\n\tconf.TrailingLogs = 10\n\tc := MakeCluster(1, t, conf)\n\tdefer c.Close()\n\n\t// With nothing committed, asking for a snapshot should return an error.\n\tleader := c.Leader()\n\tif userSnapshotErrorsOnNoData {\n\t\tif err := leader.Snapshot().Error(); err != ErrNothingNewToSnapshot {\n\t\t\tt.Fatalf(\"Request for Snapshot failed: %v\", err)\n\t\t}\n\t}\n\n\t// Commit some things.\n\tvar future Future\n\tfor i := 0; i < 10; i++ {\n\t\tfuture = leader.Apply([]byte(fmt.Sprintf(\"test %d\", i)), 0)\n\t}\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"Error Apply new log entries: %v\", err)\n\t}\n\n\t// Now we should be able to ask for a snapshot without getting an error.\n\tif err := leader.Snapshot().Error(); err != nil {\n\t\tt.Fatalf(\"Request for Snapshot failed: %v\", err)\n\t}\n\n\t// Check for snapshot\n\tif snaps, _ := leader.snapshots.List(); len(snaps) == 0 {\n\t\tt.Fatalf(\"should have a snapshot\")\n\t}\n}\n\n// snapshotAndRestore does a snapshot and restore sequence and applies the given\n// offset to the snapshot index, so we can try out different situations.\nfunc snapshotAndRestore(t *testing.T, offset uint64, monotonicLogStore bool, restoreNewCluster bool) {\n\tt.Helper()\n\n\t// Make the cluster.\n\tconf := inmemConfig(t)\n\n\t// snapshot operations perform some file IO operations.\n\t// increase times out to account for that\n\tconf.HeartbeatTimeout = 500 * time.Millisecond\n\tconf.ElectionTimeout = 500 * time.Millisecond\n\tconf.LeaderLeaseTimeout = 500 * time.Millisecond\n\n\tvar c *cluster\n\tnumPeers := 3\n\toptsMonotonic := &MakeClusterOpts{\n\t\tPeers:         numPeers,\n\t\tBootstrap:     true,\n\t\tConf:          conf,\n\t\tMonotonicLogs: true,\n\t}\n\tif monotonicLogStore {\n\t\tc = MakeClusterCustom(t, optsMonotonic)\n\t} else {\n\t\tc = MakeCluster(numPeers, t, conf)\n\t}\n\tdefer c.Close()\n\n\t// Wait for things to get stable and commit some things.\n\tleader := c.Leader()\n\tvar future Future\n\tfor i := 0; i < 10; i++ {\n\t\tfuture = leader.Apply([]byte(fmt.Sprintf(\"test %d\", i)), 0)\n\t}\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"Error Apply new log entries: %v\", err)\n\t}\n\n\t// Take a snapshot.\n\tsnap := leader.Snapshot()\n\tif err := snap.Error(); err != nil {\n\t\tt.Fatalf(\"Request for Snapshot failed: %v\", err)\n\t}\n\n\t// Commit some more things.\n\tfor i := 10; i < 20; i++ {\n\t\tfuture = leader.Apply([]byte(fmt.Sprintf(\"test %d\", i)), 0)\n\t}\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"Error Apply new log entries: %v\", err)\n\t}\n\n\t// Get the last index before the restore.\n\tpreIndex := leader.getLastIndex()\n\n\tif restoreNewCluster {\n\t\tvar c2 *cluster\n\t\tif monotonicLogStore {\n\t\t\tc2 = MakeClusterCustom(t, optsMonotonic)\n\t\t} else {\n\t\t\tc2 = MakeCluster(numPeers, t, conf)\n\t\t}\n\t\tc = c2\n\t\tleader = c.Leader()\n\t}\n\n\t// Restore the snapshot, twiddling the index with the offset.\n\tmeta, reader, err := snap.Open()\n\tmeta.Index += offset\n\tif err != nil {\n\t\tt.Fatalf(\"Snapshot open failed: %v\", err)\n\t}\n\tdefer func() { _ = reader.Close() }()\n\tif err := leader.Restore(meta, reader, 5*time.Second); err != nil {\n\t\tt.Fatalf(\"Restore failed: %v\", err)\n\t}\n\n\t// Make sure the index was updated correctly. We add 2 because we burn\n\t// an index to create a hole, and then we apply a no-op after the\n\t// restore.\n\tvar expected uint64\n\tif !restoreNewCluster && meta.Index < preIndex {\n\t\texpected = preIndex + 2\n\t} else {\n\t\t// restoring onto a new cluster should always have a last index based\n\t\t// off of the snapshot meta index\n\t\texpected = meta.Index + 2\n\t}\n\n\tlastIndex := leader.getLastIndex()\n\tif lastIndex != expected {\n\t\tt.Fatalf(\"Index was not updated correctly: %d vs. %d\", lastIndex, expected)\n\t}\n\n\t// Ensure raft logs are removed for monotonic log stores but remain\n\t// untouched for non-monotic (BoltDB) logstores.\n\t// When first index = 1, then logs have remained untouched.\n\t// When first index is set to the next commit index / last index, then\n\t// it means logs have been removed.\n\traftNodes := make([]*Raft, 0, numPeers+1)\n\traftNodes = append(raftNodes, leader)\n\traftNodes = append(raftNodes, c.Followers()...)\n\tfor _, raftNode := range raftNodes {\n\t\tfirstLogIndex, err := raftNode.logs.FirstIndex()\n\t\trequire.NoError(t, err)\n\t\tlastLogIndex, err := raftNode.logs.LastIndex()\n\t\trequire.NoError(t, err)\n\t\tif monotonicLogStore {\n\t\t\trequire.Equal(t, expected, firstLogIndex)\n\t\t} else {\n\t\t\trequire.Equal(t, uint64(1), firstLogIndex)\n\t\t}\n\t\trequire.Equal(t, expected, lastLogIndex)\n\t}\n\t// Ensure all the fsm logs are the same and that we have everything that was\n\t// part of the original snapshot, and that the contents after were\n\t// reverted.\n\tc.EnsureSame(t)\n\tfsm := getMockFSM(c.fsms[0])\n\tfsm.Lock()\n\tif len(fsm.logs) != 10 {\n\t\tt.Fatalf(\"Log length bad: %d\", len(fsm.logs))\n\t}\n\tfor i, entry := range fsm.logs {\n\t\texpected := []byte(fmt.Sprintf(\"test %d\", i))\n\t\trequire.Equal(t, entry, expected)\n\t}\n\tfsm.Unlock()\n\n\t// Commit some more things.\n\tfor i := 20; i < 30; i++ {\n\t\tfuture = leader.Apply([]byte(fmt.Sprintf(\"test %d\", i)), 0)\n\t}\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"Error Apply new log entries: %v\", err)\n\t}\n\tc.EnsureSame(t)\n}\n\nfunc TestRaft_UserRestore(t *testing.T) {\n\tcases := []uint64{\n\t\t0,\n\t\t1,\n\t\t2,\n\n\t\t// Snapshots from the future\n\t\t100,\n\t\t1000,\n\t\t10000,\n\t}\n\n\trestoreToNewClusterCases := []bool{false, true}\n\n\tfor _, c := range cases {\n\t\tfor _, restoreNewCluster := range restoreToNewClusterCases {\n\t\t\tt.Run(fmt.Sprintf(\"case %v | restored to new cluster: %t\", c, restoreNewCluster), func(t *testing.T) {\n\t\t\t\tsnapshotAndRestore(t, c, false, restoreNewCluster)\n\t\t\t})\n\t\t\tt.Run(fmt.Sprintf(\"monotonic case %v | restored to new cluster: %t\", c, restoreNewCluster), func(t *testing.T) {\n\t\t\t\tsnapshotAndRestore(t, c, true, restoreNewCluster)\n\t\t\t})\n\t\t}\n\t}\n}\n\nfunc TestRaft_SendSnapshotFollower(t *testing.T) {\n\t// Make the cluster\n\tconf := inmemConfig(t)\n\tconf.TrailingLogs = 10\n\tc := MakeCluster(3, t, conf)\n\tdefer c.Close()\n\n\t// Disconnect one follower\n\tfollowers := c.Followers()\n\tleader := c.Leader()\n\tbehind := followers[0]\n\tc.Disconnect(behind.localAddr)\n\n\t// Commit a lot of things\n\tvar future Future\n\tfor i := 0; i < 100; i++ {\n\t\tfuture = leader.Apply([]byte(fmt.Sprintf(\"test%d\", i)), 0)\n\t}\n\n\t// Wait for the last future to apply\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t} else {\n\t\tt.Logf(\"[INFO] Finished apply without behind follower\")\n\t}\n\n\t// Snapshot, this will truncate logs!\n\tfor _, r := range c.rafts {\n\t\tfuture = r.Snapshot()\n\t\t// the disconnected node will have nothing to snapshot, so that's expected\n\t\tif err := future.Error(); err != nil && err != ErrNothingNewToSnapshot {\n\t\t\tt.Fatalf(\"err: %v\", err)\n\t\t}\n\t}\n\n\t// Reconnect the behind node\n\tc.FullyConnect()\n\n\t// Ensure all the logs are the same\n\tc.EnsureSame(t)\n}\n\nfunc TestRaft_SendSnapshotAndLogsFollower(t *testing.T) {\n\t// Make the cluster\n\tconf := inmemConfig(t)\n\tconf.TrailingLogs = 10\n\tc := MakeCluster(3, t, conf)\n\tdefer c.Close()\n\n\t// Disconnect one follower\n\tfollowers := c.Followers()\n\tleader := c.Leader()\n\tbehind := followers[0]\n\tc.Disconnect(behind.localAddr)\n\n\t// Commit a lot of things\n\tvar future Future\n\tfor i := 0; i < 100; i++ {\n\t\tfuture = leader.Apply([]byte(fmt.Sprintf(\"test%d\", i)), 0)\n\t}\n\n\t// Wait for the last future to apply\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t} else {\n\t\tt.Logf(\"[INFO] Finished apply without behind follower\")\n\t}\n\n\t// Snapshot, this will truncate logs!\n\tfor _, r := range c.rafts {\n\t\tfuture = r.Snapshot()\n\t\t// the disconnected node will have nothing to snapshot, so that's expected\n\t\tif err := future.Error(); err != nil && err != ErrNothingNewToSnapshot {\n\t\t\tt.Fatalf(\"err: %v\", err)\n\t\t}\n\t}\n\n\t// Commit more logs past the snapshot.\n\tfor i := 100; i < 200; i++ {\n\t\tfuture = leader.Apply([]byte(fmt.Sprintf(\"test%d\", i)), 0)\n\t}\n\n\t// Wait for the last future to apply\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t} else {\n\t\tt.Logf(\"[INFO] Finished apply without behind follower\")\n\t}\n\n\t// Reconnect the behind node\n\tc.FullyConnect()\n\n\t// Ensure all the logs are the same\n\tc.EnsureSame(t)\n}\n\nfunc TestRaft_ReJoinFollower(t *testing.T) {\n\t// Enable operation after a remove.\n\tconf := inmemConfig(t)\n\tconf.ShutdownOnRemove = false\n\tc := MakeCluster(3, t, conf)\n\tdefer c.Close()\n\n\t// Get the leader.\n\tleader := c.Leader()\n\n\t// Wait until we have 2 followers.\n\tlimit := time.Now().Add(c.longstopTimeout)\n\tvar followers []*Raft\n\tfor time.Now().Before(limit) && len(followers) != 2 {\n\t\tc.WaitEvent(nil, c.conf.CommitTimeout)\n\t\tfollowers = c.GetInState(Follower)\n\t}\n\tif len(followers) != 2 {\n\t\tt.Fatalf(\"expected two followers: %v\", followers)\n\t}\n\n\t// Remove a follower.\n\tfollower := followers[0]\n\tfuture := leader.RemoveServer(follower.localID, 0, 0)\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Other nodes should have fewer peers.\n\ttime.Sleep(c.propagateTimeout)\n\tif configuration := c.getConfiguration(leader); len(configuration.Servers) != 2 {\n\t\tt.Fatalf(\"too many peers: %v\", configuration)\n\t}\n\tif configuration := c.getConfiguration(followers[1]); len(configuration.Servers) != 2 {\n\t\tt.Fatalf(\"too many peers: %v\", configuration)\n\t}\n\n\t// Get the leader. We can't use the normal stability checker here because\n\t// the removed server will be trying to run an election but will be\n\t// ignored. The stability check will think this is off nominal because\n\t// the RequestVote RPCs won't stop firing.\n\tlimit = time.Now().Add(c.longstopTimeout)\n\tvar leaders []*Raft\n\tfor time.Now().Before(limit) && len(leaders) != 1 {\n\t\tc.WaitEvent(nil, c.conf.CommitTimeout)\n\t\tleaders, _ = c.pollState(Leader)\n\t}\n\tif len(leaders) != 1 {\n\t\tt.Fatalf(\"expected a leader\")\n\t}\n\tleader = leaders[0]\n\n\t// Rejoin. The follower will have a higher term than the leader,\n\t// this will cause the leader to step down, and a new round of elections\n\t// to take place. We should eventually re-stabilize.\n\tfuture = leader.AddVoter(follower.localID, follower.localAddr, 0, 0)\n\tif err := future.Error(); err != nil && err != ErrLeadershipLost {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// We should level back up to the proper number of peers. We add a\n\t// stability check here to make sure the cluster gets to a state where\n\t// there's a solid leader.\n\tleader = c.Leader()\n\tif configuration := c.getConfiguration(leader); len(configuration.Servers) != 3 {\n\t\tt.Fatalf(\"missing peers: %v\", configuration)\n\t}\n\tif configuration := c.getConfiguration(followers[1]); len(configuration.Servers) != 3 {\n\t\tt.Fatalf(\"missing peers: %v\", configuration)\n\t}\n\n\t// Should be a follower now.\n\tif follower.State() != Follower {\n\t\tt.Fatalf(\"bad state: %v\", follower.State())\n\t}\n}\n\nfunc TestRaft_LeaderLeaseExpire(t *testing.T) {\n\t// Make a cluster\n\tconf := inmemConfig(t)\n\tc := MakeCluster(2, t, conf)\n\tdefer c.Close()\n\n\t// Get the leader\n\tleader := c.Leader()\n\n\t// Wait until we have a followers\n\tlimit := time.Now().Add(c.longstopTimeout)\n\tvar followers []*Raft\n\tfor time.Now().Before(limit) && len(followers) != 1 {\n\t\tc.WaitEvent(nil, c.conf.CommitTimeout)\n\t\tfollowers = c.GetInState(Follower)\n\t}\n\tif len(followers) != 1 {\n\t\tt.Fatalf(\"expected a followers: %v\", followers)\n\t}\n\n\t// Disconnect the follower now\n\tfollower := followers[0]\n\tt.Logf(\"[INFO] Disconnecting %v\", follower)\n\tc.Disconnect(follower.localAddr)\n\n\t// Watch the leaderCh\n\ttimeout := time.After(conf.LeaderLeaseTimeout * 2)\nLOOP:\n\tfor {\n\t\tselect {\n\t\tcase v := <-leader.LeaderCh():\n\t\t\tif !v {\n\t\t\t\tbreak LOOP\n\t\t\t}\n\t\tcase <-timeout:\n\t\t\tt.Fatalf(\"timeout stepping down as leader\")\n\t\t}\n\t}\n\n\t// Ensure the last contact of the leader is non-zero\n\tif leader.LastContact().IsZero() {\n\t\tt.Fatalf(\"expected non-zero contact time\")\n\t}\n\n\t// Should be no leaders\n\tif len(c.GetInState(Leader)) != 0 {\n\t\tt.Fatalf(\"expected step down\")\n\t}\n\n\t// Verify no further contact\n\tlast := follower.LastContact()\n\ttime.Sleep(c.propagateTimeout)\n\n\t// Check that last contact has not changed\n\tif last != follower.LastContact() {\n\t\tt.Fatalf(\"unexpected further contact\")\n\t}\n\n\t// Ensure both have cleared their leader\n\tif l, id := leader.LeaderWithID(); l != \"\" && id != \"\" {\n\t\tt.Fatalf(\"bad: %v\", l)\n\t}\n\tif l, id := follower.LeaderWithID(); l != \"\" && id != \"\" {\n\t\tt.Fatalf(\"bad: %v\", l)\n\t}\n}\n\nfunc TestRaft_Barrier(t *testing.T) {\n\t// Make the cluster\n\tc := MakeCluster(3, t, nil)\n\tdefer c.Close()\n\n\t// Get the leader\n\tleader := c.Leader()\n\n\t// Commit a lot of things\n\tfor i := 0; i < 100; i++ {\n\t\tleader.Apply(fmt.Appendf([]byte{}, \"test%d\", i), 0)\n\t}\n\n\t// Wait for a barrier complete\n\tbarrier := leader.Barrier(0)\n\n\t// Wait for the barrier future to apply\n\tif err := barrier.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Ensure all the logs are the same\n\tc.EnsureSame(t)\n\tif len(getMockFSM(c.fsms[0]).logs) != 100 {\n\t\tt.Fatalf(\"Bad log length: %d\", len(getMockFSM(c.fsms[0]).logs))\n\t}\n}\n\nfunc TestRaft_VerifyLeader(t *testing.T) {\n\t// Make the cluster\n\tc := MakeCluster(3, t, nil)\n\tdefer c.Close()\n\n\t// Get the leader\n\tleader := c.Leader()\n\n\t// Verify we are leader\n\tverify := leader.VerifyLeader()\n\n\t// Wait for the verify to apply\n\tif err := verify.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n}\n\nfunc TestRaft_VerifyLeader_Single(t *testing.T) {\n\t// Make the cluster\n\tc := MakeCluster(1, t, nil)\n\tdefer c.Close()\n\n\t// Get the leader\n\tleader := c.Leader()\n\n\t// Verify we are leader\n\tverify := leader.VerifyLeader()\n\n\t// Wait for the verify to apply\n\tif err := verify.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n}\n\nfunc TestRaft_VerifyLeader_Fail(t *testing.T) {\n\t// Make a cluster\n\tconf := inmemConfig(t)\n\tc := MakeCluster(2, t, conf)\n\tdefer c.Close()\n\n\tleader := c.Leader()\n\t// Remove the leader election notification from the channel buffer\n\t<-leader.LeaderCh()\n\n\t// Wait until we have a followers\n\tfollowers := c.Followers()\n\n\t// Force follower to different term\n\tfollower := followers[0]\n\tfollower.setCurrentTerm(follower.getCurrentTerm() + 1)\n\n\t// Wait for the leader to step down\n\tselect {\n\tcase v := <-leader.LeaderCh():\n\t\tif v {\n\t\t\tt.Fatalf(\"expected the leader to step down\")\n\t\t}\n\tcase <-time.After(conf.HeartbeatTimeout * 3):\n\t\tc.FailNowf(\"timeout waiting for leader to step down\")\n\t}\n\n\t// Verify we are leader\n\tverify := leader.VerifyLeader()\n\n\tif err := verify.Error(); err != ErrNotLeader && err != ErrLeadershipLost {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Ensure the known leader is cleared\n\tif l, _ := leader.LeaderWithID(); l != \"\" {\n\t\tt.Fatalf(\"bad: %v\", l)\n\t}\n}\n\nfunc TestRaft_VerifyLeader_PartialConnect(t *testing.T) {\n\t// Make a cluster\n\tconf := inmemConfig(t)\n\tc := MakeCluster(3, t, conf)\n\tdefer c.Close()\n\n\t// Get the leader\n\tleader := c.Leader()\n\n\t// Wait until we have a followers\n\tlimit := time.Now().Add(c.longstopTimeout)\n\tvar followers []*Raft\n\tfor time.Now().Before(limit) && len(followers) != 2 {\n\t\tc.WaitEvent(nil, c.conf.CommitTimeout)\n\t\tfollowers = c.GetInState(Follower)\n\t}\n\tif len(followers) != 2 {\n\t\tt.Fatalf(\"expected two followers but got: %v\", followers)\n\t}\n\n\t// Force partial disconnect\n\tfollower := followers[0]\n\tt.Logf(\"[INFO] Disconnecting %v\", follower)\n\tc.Disconnect(follower.localAddr)\n\n\t// Verify we are leader\n\tverify := leader.VerifyLeader()\n\n\t// Wait for the leader to step down\n\tif err := verify.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n}\n\nfunc TestRaft_NotifyCh(t *testing.T) {\n\tch := make(chan bool, 1)\n\tconf := inmemConfig(t)\n\tconf.NotifyCh = ch\n\tc := MakeCluster(1, t, conf)\n\tdefer c.Close()\n\n\t// Watch leaderCh for change\n\tselect {\n\tcase v := <-ch:\n\t\tif !v {\n\t\t\tt.Fatalf(\"should become leader\")\n\t\t}\n\tcase <-time.After(conf.HeartbeatTimeout * 8):\n\t\tt.Fatalf(\"timeout becoming leader\")\n\t}\n\n\t// Close the cluster\n\tc.Close()\n\n\t// Watch leaderCh for change\n\tselect {\n\tcase v := <-ch:\n\t\tif v {\n\t\t\tt.Fatalf(\"should step down as leader\")\n\t\t}\n\tcase <-time.After(conf.HeartbeatTimeout * 6):\n\t\tt.Fatalf(\"timeout on step down as leader\")\n\t}\n}\n\nfunc TestRaft_AppendEntry(t *testing.T) {\n\tc := MakeCluster(3, t, nil)\n\tdefer c.Close()\n\tfollowers := c.Followers()\n\tldr := c.Leader()\n\tldrT := c.trans[c.IndexOf(ldr)]\n\n\treqAppendEntries := AppendEntriesRequest{\n\t\tRPCHeader:    ldr.getRPCHeader(),\n\t\tTerm:         ldr.getCurrentTerm() + 1,\n\t\tPrevLogEntry: 0,\n\t\tPrevLogTerm:  ldr.getCurrentTerm(),\n\t\tLeader:       nil,\n\t\tEntries: []*Log{\n\t\t\t{\n\t\t\t\tIndex: 1,\n\t\t\t\tTerm:  ldr.getCurrentTerm() + 1,\n\t\t\t\tType:  LogCommand,\n\t\t\t\tData:  []byte(\"log 1\"),\n\t\t\t},\n\t\t},\n\t\tLeaderCommitIndex: 90,\n\t}\n\t// a follower that thinks there's a leader should vote for that leader.\n\tvar resp AppendEntriesResponse\n\tif err := ldrT.AppendEntries(followers[0].localID, followers[0].localAddr, &reqAppendEntries, &resp); err != nil {\n\t\tt.Fatalf(\"RequestVote RPC failed %v\", err)\n\t}\n\n\trequire.True(t, resp.Success)\n\n\theaders := ldr.getRPCHeader()\n\theaders.ID = nil\n\theaders.Addr = nil\n\treqAppendEntries = AppendEntriesRequest{\n\t\tRPCHeader:    headers,\n\t\tTerm:         ldr.getCurrentTerm() + 1,\n\t\tPrevLogEntry: 0,\n\t\tPrevLogTerm:  ldr.getCurrentTerm(),\n\t\tLeader:       ldr.trans.EncodePeer(ldr.config().LocalID, ldr.localAddr),\n\t\tEntries: []*Log{\n\t\t\t{\n\t\t\t\tIndex: 1,\n\t\t\t\tTerm:  ldr.getCurrentTerm() + 1,\n\t\t\t\tType:  LogCommand,\n\t\t\t\tData:  []byte(\"log 1\"),\n\t\t\t},\n\t\t},\n\t\tLeaderCommitIndex: 90,\n\t}\n\t// a follower that thinks there's a leader should vote for that leader.\n\tvar resp2 AppendEntriesResponse\n\tif err := ldrT.AppendEntries(followers[0].localID, followers[0].localAddr, &reqAppendEntries, &resp2); err != nil {\n\t\tt.Fatalf(\"RequestVote RPC failed %v\", err)\n\t}\n\n\trequire.True(t, resp2.Success)\n}\n\n// TestRaft_PreVoteMixedCluster focus on testing a cluster with\n// a mix of nodes that have pre-vote activated and deactivated.\n// Once the cluster is created, we force an election by partioning the leader\n// and verify that the cluster regain stability.\nfunc TestRaft_PreVoteMixedCluster(t *testing.T) {\n\n\ttcs := []struct {\n\t\tname         string\n\t\tprevoteNum   int\n\t\tnoprevoteNum int\n\t}{\n\t\t{\"majority no pre-vote\", 2, 3},\n\t\t{\"majority pre-vote\", 3, 2},\n\t\t{\"majority no pre-vote\", 1, 2},\n\t\t{\"majority pre-vote\", 2, 1},\n\t\t{\"all pre-vote\", 3, 0},\n\t\t{\"all no pre-vote\", 0, 3},\n\t}\n\tfor _, tc := range tcs {\n\t\tt.Run(tc.name, func(t *testing.T) {\n\n\t\t\t// Make majority cluster.\n\t\t\tmajority := tc.prevoteNum\n\t\t\tminority := tc.noprevoteNum\n\t\t\tif tc.prevoteNum < tc.noprevoteNum {\n\t\t\t\tmajority = tc.noprevoteNum\n\t\t\t\tminority = tc.prevoteNum\n\t\t\t}\n\n\t\t\tconf := inmemConfig(t)\n\t\t\tconf.PreVoteDisabled = tc.prevoteNum <= tc.noprevoteNum\n\t\t\tc := MakeCluster(majority, t, conf)\n\t\t\tdefer c.Close()\n\n\t\t\t// Set up another server speaking protocol version 2.\n\t\t\tconf = inmemConfig(t)\n\t\t\tconf.PreVoteDisabled = tc.prevoteNum >= tc.noprevoteNum\n\t\t\tc1 := MakeClusterNoBootstrap(minority, t, conf)\n\n\t\t\t// Merge clusters.\n\t\t\tc.Merge(c1)\n\t\t\tc.FullyConnect()\n\n\t\t\tfor _, r := range c1.rafts {\n\t\t\t\tfuture := c.Leader().AddVoter(r.localID, r.localAddr, 0, 0)\n\t\t\t\tif err := future.Error(); err != nil {\n\t\t\t\t\tt.Fatalf(\"err: %v\", err)\n\t\t\t\t}\n\t\t\t}\n\t\t\ttime.Sleep(c.propagateTimeout * 10)\n\n\t\t\tleaderOld := c.Leader()\n\t\t\tc.Followers()\n\t\t\tc.Partition([]ServerAddress{leaderOld.localAddr})\n\t\t\ttime.Sleep(c.propagateTimeout * 3)\n\t\t\tleader := c.Leader()\n\t\t\trequire.NotEqual(t, leader.leaderID, leaderOld.leaderID)\n\t\t})\n\t}\n\n}\n\nfunc TestRaft_PreVoteAvoidElectionWithPartition(t *testing.T) {\n\t// Make a prevote cluster.\n\tconf := inmemConfig(t)\n\tconf.PreVoteDisabled = false\n\tc := MakeCluster(5, t, conf)\n\tdefer c.Close()\n\n\toldLeaderTerm := c.Leader().getCurrentTerm()\n\tfollowers := c.Followers()\n\trequire.Len(t, followers, 4)\n\n\t//Partition a node and wait enough for it to increase its term\n\tc.Partition([]ServerAddress{followers[0].localAddr})\n\ttime.Sleep(10 * c.propagateTimeout)\n\n\t// Check the leader is stable and the followers are as expected\n\tleaderTerm := c.Leader().getCurrentTerm()\n\trequire.Equal(t, leaderTerm, oldLeaderTerm)\n\trequire.Len(t, c.WaitForFollowers(3), 3)\n\n\t// reconnect the partitioned node\n\tc.FullyConnect()\n\ttime.Sleep(3 * c.propagateTimeout)\n\n\t// Check that the number of followers increase and the term is not increased\n\trequire.Len(t, c.Followers(), 4)\n\tleaderTerm = c.Leader().getCurrentTerm()\n\trequire.Equal(t, leaderTerm, oldLeaderTerm)\n\n}\n\nfunc TestRaft_VotingGrant_WhenLeaderAvailable(t *testing.T) {\n\tconf := inmemConfig(t)\n\tconf.ProtocolVersion = 3\n\tc := MakeCluster(3, t, conf)\n\tdefer c.Close()\n\tfollowers := c.Followers()\n\tldr := c.Leader()\n\tldrT := c.trans[c.IndexOf(ldr)]\n\n\treqVote := RequestVoteRequest{\n\t\tRPCHeader:          ldr.getRPCHeader(),\n\t\tTerm:               ldr.getCurrentTerm() + 10,\n\t\tLastLogIndex:       ldr.LastIndex(),\n\t\tCandidate:          ldrT.EncodePeer(ldr.localID, ldr.localAddr),\n\t\tLastLogTerm:        ldr.getCurrentTerm(),\n\t\tLeadershipTransfer: false,\n\t}\n\t// a follower that thinks there's a leader should vote for that leader.\n\tvar resp RequestVoteResponse\n\tif err := ldrT.RequestVote(followers[0].localID, followers[0].localAddr, &reqVote, &resp); err != nil {\n\t\tt.Fatalf(\"RequestVote RPC failed %v\", err)\n\t}\n\tif !resp.Granted {\n\t\tt.Fatalf(\"expected vote to be granted, but wasn't %+v\", resp)\n\t}\n\t// a follower that thinks there's a leader shouldn't vote for a different candidate\n\treqVote.Addr = ldrT.EncodePeer(followers[0].localID, followers[0].localAddr)\n\treqVote.Candidate = ldrT.EncodePeer(followers[0].localID, followers[0].localAddr)\n\tif err := ldrT.RequestVote(followers[1].localID, followers[1].localAddr, &reqVote, &resp); err != nil {\n\t\tt.Fatalf(\"RequestVote RPC failed %v\", err)\n\t}\n\tif resp.Granted {\n\t\tt.Fatalf(\"expected vote not to be granted, but was %+v\", resp)\n\t}\n\t// a follower that thinks there's a leader, but the request has the leadership transfer flag, should\n\t// vote for a different candidate\n\treqVote.LeadershipTransfer = true\n\treqVote.Addr = ldrT.EncodePeer(followers[0].localID, followers[0].localAddr)\n\treqVote.Candidate = ldrT.EncodePeer(followers[0].localID, followers[0].localAddr)\n\tif err := ldrT.RequestVote(followers[1].localID, followers[1].localAddr, &reqVote, &resp); err != nil {\n\t\tt.Fatalf(\"RequestVote RPC failed %v\", err)\n\t}\n\tif !resp.Granted {\n\t\tt.Fatalf(\"expected vote to be granted, but wasn't %+v\", resp)\n\t}\n}\n\nfunc TestRaft_ProtocolVersion_RejectRPC(t *testing.T) {\n\tc := MakeCluster(3, t, nil)\n\tdefer c.Close()\n\tfollowers := c.Followers()\n\tldr := c.Leader()\n\tldrT := c.trans[c.IndexOf(ldr)]\n\n\treqVote := RequestVoteRequest{\n\t\tRPCHeader: RPCHeader{\n\t\t\tProtocolVersion: ProtocolVersionMax + 1,\n\t\t\tAddr:            ldrT.EncodePeer(ldr.localID, ldr.localAddr),\n\t\t},\n\t\tTerm:         ldr.getCurrentTerm() + 10,\n\t\tLastLogIndex: ldr.LastIndex(),\n\t\tLastLogTerm:  ldr.getCurrentTerm(),\n\t}\n\n\t// Reject a message from a future version we don't understand.\n\tvar resp RequestVoteResponse\n\terr := ldrT.RequestVote(followers[0].localID, followers[0].localAddr, &reqVote, &resp)\n\tif err == nil || !strings.Contains(err.Error(), \"protocol version\") {\n\t\tt.Fatalf(\"expected RPC to get rejected: %v\", err)\n\t}\n\n\t// Reject a message that's too old.\n\treqVote.ProtocolVersion = followers[0].protocolVersion - 2\n\terr = ldrT.RequestVote(followers[0].localID, followers[0].localAddr, &reqVote, &resp)\n\tif err == nil || !strings.Contains(err.Error(), \"protocol version\") {\n\t\tt.Fatalf(\"expected RPC to get rejected: %v\", err)\n\t}\n}\n\nfunc TestRaft_ProtocolVersion_Upgrade_1_2(t *testing.T) {\n\t// Make a cluster back on protocol version 1.\n\tconf := inmemConfig(t)\n\tconf.ProtocolVersion = 1\n\tc := MakeCluster(2, t, conf)\n\tdefer c.Close()\n\n\t// Set up another server speaking protocol version 2.\n\tconf = inmemConfig(t)\n\tconf.ProtocolVersion = 2\n\tc1 := MakeClusterNoBootstrap(1, t, conf)\n\n\t// Merge clusters.\n\tc.Merge(c1)\n\tc.FullyConnect()\n\n\t// Make sure the new ID-based operations aren't supported in the old\n\t// protocol.\n\tfuture := c.Leader().AddNonvoter(c1.rafts[0].localID, c1.rafts[0].localAddr, 0, 1*time.Second)\n\tif err := future.Error(); err != ErrUnsupportedProtocol {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tfuture = c.Leader().DemoteVoter(c1.rafts[0].localID, 0, 1*time.Second)\n\tif err := future.Error(); err != ErrUnsupportedProtocol {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Now do the join using the old address-based API.\n\tif future := c.Leader().AddPeer(c1.rafts[0].localAddr); future.Error() != nil {\n\t\tt.Fatalf(\"err: %v\", future.Error())\n\t}\n\n\t// Sanity check the cluster.\n\tc.EnsureSame(t)\n\tc.EnsureSamePeers(t)\n\tc.EnsureLeader(t, c.Leader().localAddr)\n\n\t// Now do the remove using the old address-based API.\n\tif future := c.Leader().RemovePeer(c1.rafts[0].localAddr); future.Error() != nil {\n\t\tt.Fatalf(\"err: %v\", future.Error())\n\t}\n}\n\nfunc TestRaft_ProtocolVersion_Upgrade_2_3(t *testing.T) {\n\t// Make a cluster back on protocol version 2.\n\tconf := inmemConfig(t)\n\tconf.ProtocolVersion = 2\n\tc := MakeCluster(2, t, conf)\n\tdefer c.Close()\n\toldAddr := c.Followers()[0].localAddr\n\n\t// Set up another server speaking protocol version 3.\n\tconf = inmemConfig(t)\n\tconf.ProtocolVersion = 3\n\tc1 := MakeClusterNoBootstrap(1, t, conf)\n\n\t// Merge clusters.\n\tc.Merge(c1)\n\tc.FullyConnect()\n\n\t// Use the new ID-based API to add the server with its ID.\n\tfuture := c.Leader().AddVoter(c1.rafts[0].localID, c1.rafts[0].localAddr, 0, 1*time.Second)\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Sanity check the cluster.\n\tc.EnsureSame(t)\n\tc.EnsureSamePeers(t)\n\tc.EnsureLeader(t, c.Leader().localAddr)\n\n\t// Remove an old server using the old address-based API.\n\tif future := c.Leader().RemovePeer(oldAddr); future.Error() != nil {\n\t\tt.Fatalf(\"err: %v\", future.Error())\n\t}\n}\n\nfunc TestRaft_LeaderID_Propagated(t *testing.T) {\n\t// Make a cluster on protocol version 3.\n\tconf := inmemConfig(t)\n\tc := MakeCluster(3, t, conf)\n\tdefer c.Close()\n\terr := waitForLeader(c)\n\trequire.NoError(t, err)\n\n\tfor _, n := range c.rafts {\n\t\trequire.Equal(t, ProtocolVersion(3), n.protocolVersion)\n\t\taddr, id := n.LeaderWithID()\n\t\trequire.NotEmpty(t, id)\n\t\trequire.NotEmpty(t, addr)\n\t}\n\tfor i := 0; i < 5; i++ {\n\t\tfuture := c.Leader().Apply([]byte(fmt.Sprintf(\"test%d\", i)), 0)\n\t\tif err := future.Error(); err != nil {\n\t\t\tt.Fatalf(\"[ERR] err: %v\", err)\n\t\t}\n\t}\n\t// Wait a while\n\ttime.Sleep(c.propagateTimeout)\n\n\t// Sanity check the cluster.\n\tc.EnsureSame(t)\n\tc.EnsureSamePeers(t)\n\tc.EnsureLeader(t, c.Leader().localAddr)\n}\n\nfunc TestRaft_LeadershipTransferInProgress(t *testing.T) {\n\tr := &Raft{leaderState: leaderState{}}\n\tr.setupLeaderState()\n\n\tif r.getLeadershipTransferInProgress() != false {\n\t\tt.Errorf(\"should be true after setup\")\n\t}\n\n\tr.setLeadershipTransferInProgress(true)\n\tif r.getLeadershipTransferInProgress() != true {\n\t\tt.Errorf(\"should be true because we set it before\")\n\t}\n\tr.setLeadershipTransferInProgress(false)\n\tif r.getLeadershipTransferInProgress() != false {\n\t\tt.Errorf(\"should be false because we set it before\")\n\t}\n}\n\nfunc pointerToString(s string) *string {\n\treturn &s\n}\n\nfunc TestRaft_LeadershipTransferPickServer(t *testing.T) {\n\ttype variant struct {\n\t\tlastLogIndex int\n\t\tservers      map[string]uint64\n\t\texpected     *string\n\t}\n\tleaderID := \"z\"\n\tvariants := []variant{\n\t\t{lastLogIndex: 10, servers: map[string]uint64{}, expected: nil},\n\t\t{lastLogIndex: 10, servers: map[string]uint64{leaderID: 11, \"a\": 9}, expected: pointerToString(\"a\")},\n\t\t{lastLogIndex: 10, servers: map[string]uint64{leaderID: 11, \"a\": 9, \"b\": 8}, expected: pointerToString(\"a\")},\n\t\t{lastLogIndex: 10, servers: map[string]uint64{leaderID: 11, \"c\": 9, \"b\": 8, \"a\": 8}, expected: pointerToString(\"c\")},\n\t\t{lastLogIndex: 10, servers: map[string]uint64{leaderID: 11, \"a\": 7, \"b\": 11, \"c\": 8}, expected: pointerToString(\"b\")},\n\t}\n\tfor i, v := range variants {\n\t\tservers := []Server{}\n\t\treplState := map[ServerID]*followerReplication{}\n\t\tfor id, idx := range v.servers {\n\t\t\tservers = append(servers, Server{ID: ServerID(id)})\n\t\t\treplState[ServerID(id)] = &followerReplication{nextIndex: idx}\n\t\t}\n\t\tr := Raft{leaderState: leaderState{}, localID: ServerID(leaderID), configurations: configurations{latest: Configuration{Servers: servers}}}\n\t\tr.lastLogIndex = uint64(v.lastLogIndex)\n\t\tr.leaderState.replState = replState\n\n\t\tactual := r.pickServer()\n\t\tif v.expected == nil && actual == nil {\n\t\t\tcontinue\n\t\t} else if v.expected == nil && actual != nil {\n\t\t\tt.Errorf(\"case %d: actual: %v doesn't match expected: %v\", i, actual, v.expected)\n\t\t} else if actual == nil && v.expected != nil {\n\t\t\tt.Errorf(\"case %d: actual: %v doesn't match expected: %v\", i, actual, v.expected)\n\t\t} else if string(actual.ID) != *v.expected {\n\t\t\tt.Errorf(\"case %d: actual: %v doesn't match expected: %v\", i, actual.ID, *v.expected)\n\t\t}\n\t}\n}\n\nfunc TestRaft_LeadershipTransfer(t *testing.T) {\n\tc := MakeCluster(3, t, nil)\n\tdefer c.Close()\n\n\toldLeader := string(c.Leader().localID)\n\terr := c.Leader().LeadershipTransfer()\n\tif err.Error() != nil {\n\t\tt.Fatalf(\"Didn't expect error: %v\", err.Error())\n\t}\n\tnewLeader := string(c.Leader().localID)\n\tif oldLeader == newLeader {\n\t\tt.Error(\"Leadership should have been transitioned to another peer.\")\n\t}\n}\n\nfunc TestRaft_LeadershipTransferWithOneNode(t *testing.T) {\n\tc := MakeCluster(1, t, nil)\n\tdefer c.Close()\n\n\tfuture := c.Leader().LeadershipTransfer()\n\tif future.Error() == nil {\n\t\tt.Fatal(\"leadership transfer should err\")\n\t}\n\n\texpected := \"cannot find peer\"\n\tactual := future.Error().Error()\n\tif !strings.Contains(actual, expected) {\n\t\tt.Errorf(\"leadership transfer should err with: %s\", expected)\n\t}\n}\n\nfunc TestRaft_LeadershipTransferWithWrites(t *testing.T) {\n\tconf := inmemConfig(t)\n\tconf.Logger = hclog.New(&hclog.LoggerOptions{Level: hclog.Trace})\n\tc := MakeCluster(7, t, conf)\n\tdefer c.Close()\n\n\tdoneCh := make(chan struct{})\n\tvar writerErr error\n\tvar wg sync.WaitGroup\n\tvar writes int\n\twg.Add(1)\n\tleader := c.Leader()\n\tgo func() {\n\t\tdefer wg.Done()\n\t\tfor {\n\t\t\tselect {\n\t\t\tcase <-doneCh:\n\t\t\t\treturn\n\t\t\tdefault:\n\t\t\t\tfuture := leader.Apply([]byte(\"test\"), 0)\n\t\t\t\tswitch err := future.Error(); {\n\t\t\t\tcase errors.Is(err, ErrRaftShutdown):\n\t\t\t\t\treturn\n\t\t\t\tcase errors.Is(err, ErrNotLeader):\n\t\t\t\t\tleader = c.Leader()\n\t\t\t\tcase errors.Is(err, ErrLeadershipTransferInProgress):\n\t\t\t\t\tcontinue\n\t\t\t\tcase errors.Is(err, ErrLeadershipLost):\n\t\t\t\t\tcontinue\n\t\t\t\tcase err == nil:\n\t\t\t\t\twrites++\n\t\t\t\tdefault:\n\t\t\t\t\twriterErr = err\n\t\t\t\t}\n\t\t\t\ttime.Sleep(time.Millisecond)\n\t\t\t}\n\t\t}\n\t}()\n\n\tfollower := c.Followers()[0]\n\tfuture := c.Leader().LeadershipTransferToServer(follower.localID, follower.localAddr)\n\tif future.Error() != nil {\n\t\tt.Fatalf(\"Didn't expect error: %v\", future.Error())\n\t}\n\tif follower.localID != c.Leader().localID {\n\t\tt.Error(\"Leadership should have been transitioned to specified server.\")\n\t}\n\tclose(doneCh)\n\twg.Wait()\n\tif writerErr != nil {\n\t\tt.Fatal(writerErr)\n\t}\n\tt.Logf(\"writes: %d\", writes)\n}\n\nfunc TestRaft_LeadershipTransferWithSevenNodes(t *testing.T) {\n\tc := MakeCluster(7, t, nil)\n\tdefer c.Close()\n\n\tfollower := c.GetInState(Follower)[0]\n\tfuture := c.Leader().LeadershipTransferToServer(follower.localID, follower.localAddr)\n\tif future.Error() != nil {\n\t\tt.Fatalf(\"Didn't expect error: %v\", future.Error())\n\t}\n\tif follower.localID != c.Leader().localID {\n\t\tt.Error(\"Leadership should have been transitioned to specified server.\")\n\t}\n}\n\nfunc TestRaft_LeadershipTransferToInvalidID(t *testing.T) {\n\tc := MakeCluster(3, t, nil)\n\tdefer c.Close()\n\n\tfuture := c.Leader().LeadershipTransferToServer(ServerID(\"abc\"), ServerAddress(\"localhost\"))\n\tif future.Error() == nil {\n\t\tt.Fatal(\"leadership transfer should err\")\n\t}\n\n\texpected := \"cannot find replication state\"\n\tactual := future.Error().Error()\n\tif !strings.Contains(actual, expected) {\n\t\tt.Errorf(\"leadership transfer should err with: %s\", expected)\n\t}\n}\n\nfunc TestRaft_LeadershipTransferToInvalidAddress(t *testing.T) {\n\tc := MakeCluster(3, t, nil)\n\tdefer c.Close()\n\n\tfollower := c.GetInState(Follower)[0]\n\tfuture := c.Leader().LeadershipTransferToServer(follower.localID, ServerAddress(\"localhost\"))\n\tif future.Error() == nil {\n\t\tt.Fatal(\"leadership transfer should err\")\n\t}\n\texpected := \"failed to make TimeoutNow RPC\"\n\tactual := future.Error().Error()\n\tif !strings.Contains(actual, expected) {\n\t\tt.Errorf(\"leadership transfer should err with: %s\", expected)\n\t}\n}\n\nfunc TestRaft_LeadershipTransferToBehindServer(t *testing.T) {\n\tc := MakeCluster(3, t, nil)\n\tdefer c.Close()\n\n\tl := c.Leader()\n\tbehind := c.GetInState(Follower)[0]\n\n\t// Commit a lot of things\n\tfor i := 0; i < 1000; i++ {\n\t\tl.Apply([]byte(fmt.Sprintf(\"test%d\", i)), 0)\n\t}\n\n\tfuture := l.LeadershipTransferToServer(behind.localID, behind.localAddr)\n\tif future.Error() != nil {\n\t\tt.Fatalf(\"This is not supposed to error: %v\", future.Error())\n\t}\n\tif c.Leader().localID != behind.localID {\n\t\tt.Fatal(\"Behind server did not get leadership\")\n\t}\n}\n\nfunc TestRaft_LeadershipTransferToItself(t *testing.T) {\n\tc := MakeCluster(3, t, nil)\n\tdefer c.Close()\n\n\tl := c.Leader()\n\n\tfuture := l.LeadershipTransferToServer(l.localID, l.localAddr)\n\tif future.Error() == nil {\n\t\tt.Fatal(\"leadership transfer should err\")\n\t}\n\texpected := \"cannot transfer leadership to itself\"\n\tactual := future.Error().Error()\n\tif !strings.Contains(actual, expected) {\n\t\tt.Errorf(\"leadership transfer should err with: %s\", expected)\n\t}\n}\n\nfunc TestRaft_LeadershipTransferLeaderRejectsClientRequests(t *testing.T) {\n\tc := MakeCluster(3, t, nil)\n\tdefer c.Close()\n\tl := c.Leader()\n\tl.setLeadershipTransferInProgress(true)\n\n\t// tests for API > protocol version 3 is missing here because leadership transfer\n\t// is only available for protocol version >= 3\n\t// TODO: is something missing here?\n\tfutures := []Future{\n\t\tl.AddNonvoter(ServerID(\"\"), ServerAddress(\"\"), 0, 0),\n\t\tl.AddVoter(ServerID(\"\"), ServerAddress(\"\"), 0, 0),\n\t\tl.Apply([]byte(\"test\"), 0),\n\t\tl.Barrier(0),\n\t\tl.DemoteVoter(ServerID(\"\"), 0, 0),\n\n\t\t// the API is tested, but here we are making sure we reject any config change.\n\t\tl.requestConfigChange(configurationChangeRequest{}, 100*time.Millisecond),\n\t}\n\tfutures = append(futures, l.LeadershipTransfer())\n\n\tfor i, f := range futures {\n\t\tt.Logf(\"waiting on future %v\", i)\n\t\tif f.Error() != ErrLeadershipTransferInProgress {\n\t\t\tt.Errorf(\"case %d: should have errored with: %s, instead of %s\", i, ErrLeadershipTransferInProgress, f.Error())\n\t\t}\n\t}\n\n\tf := l.LeadershipTransferToServer(ServerID(\"\"), ServerAddress(\"\"))\n\tif f.Error() != ErrLeadershipTransferInProgress {\n\t\tt.Errorf(\"should have errored with: %s, instead of %s\", ErrLeadershipTransferInProgress, f.Error())\n\t}\n}\n\nfunc TestRaft_LeadershipTransferLeaderReplicationTimeout(t *testing.T) {\n\tc := MakeCluster(3, t, nil)\n\tdefer c.Close()\n\n\tl := c.Leader()\n\tbehind := c.GetInState(Follower)[0]\n\n\t// Commit a lot of things, so that the timeout can kick in\n\tfor i := 0; i < 10000; i++ {\n\t\tl.Apply([]byte(fmt.Sprintf(\"test%d\", i)), 0)\n\t}\n\n\t// set ElectionTimeout really short because this is used to determine\n\t// how long a transfer is allowed to take.\n\tcfg := l.config()\n\tcfg.ElectionTimeout = 1 * time.Nanosecond\n\tl.conf.Store(cfg)\n\n\tfuture := l.LeadershipTransferToServer(behind.localID, behind.localAddr)\n\tif future.Error() == nil {\n\t\tt.Log(\"This test is fishing for a replication timeout, but this is not guaranteed to happen.\")\n\t} else {\n\t\texpected := \"leadership transfer timeout\"\n\t\tactual := future.Error().Error()\n\t\tif !strings.Contains(actual, expected) {\n\t\t\tt.Errorf(\"leadership transfer should err with: %s\", expected)\n\t\t}\n\t}\n}\n\nfunc TestRaft_LeadershipTransferIgnoresNonvoters(t *testing.T) {\n\tc := MakeCluster(2, t, nil)\n\tdefer c.Close()\n\n\tfollower := c.Followers()[0]\n\n\tdemoteFuture := c.Leader().DemoteVoter(follower.localID, 0, 0)\n\tif demoteFuture.Error() != nil {\n\t\tt.Fatalf(\"demote voter err'd: %v\", demoteFuture.Error())\n\t}\n\n\tfuture := c.Leader().LeadershipTransfer()\n\tif future.Error() == nil {\n\t\tt.Fatal(\"leadership transfer should err\")\n\t}\n\n\texpected := \"cannot find peer\"\n\tactual := future.Error().Error()\n\tif !strings.Contains(actual, expected) {\n\t\tt.Errorf(\"leadership transfer should err with: %s\", expected)\n\t}\n}\n\nfunc TestRaft_LeadershipTransferStopRightAway(t *testing.T) {\n\tr := Raft{leaderState: leaderState{}, logger: hclog.New(nil)}\n\tr.setupLeaderState()\n\n\tstopCh := make(chan struct{})\n\tdoneCh := make(chan error, 1)\n\tclose(stopCh)\n\tr.leadershipTransfer(ServerID(\"a\"), ServerAddress(\"\"), &followerReplication{}, stopCh, doneCh)\n\terr := <-doneCh\n\tif err != nil {\n\t\tt.Errorf(\"leadership shouldn't have started, but instead it error with: %v\", err)\n\t}\n}\n\nfunc TestRaft_GetConfigurationNoBootstrap(t *testing.T) {\n\tc := MakeCluster(2, t, nil)\n\tdefer c.Close()\n\n\t// Should be one leader\n\tc.Followers()\n\tleader := c.Leader()\n\tc.EnsureLeader(t, leader.localAddr)\n\n\t// Should be able to apply\n\tfuture := leader.Apply([]byte(\"test\"), c.conf.CommitTimeout)\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"[ERR] err: %v\", err)\n\t}\n\tc.WaitForReplication(1)\n\n\t// Get configuration via GetConfiguration of a running node\n\tcfgf := c.rafts[0].GetConfiguration()\n\tif err := cfgf.Error(); err != nil {\n\t\tt.Fatal(err)\n\t}\n\texpected := cfgf.Configuration()\n\n\t// Obtain the same configuration via GetConfig\n\tlogs := c.stores[0]\n\tstore := c.stores[0]\n\tsnap := c.snaps[0]\n\ttrans := c.trans[0]\n\tobserved, err := GetConfiguration(c.conf, c.fsms[0], logs, store, snap, trans)\n\tif err != nil {\n\t\tt.Fatal(err)\n\t}\n\tif !reflect.DeepEqual(observed, expected) {\n\t\tt.Errorf(\"GetConfiguration result differ from Raft.GetConfiguration: observed %+v, expected %+v\", observed, expected)\n\t}\n}\n\nfunc TestRaft_LogStoreIsMonotonic(t *testing.T) {\n\tc := MakeCluster(1, t, nil)\n\tdefer c.Close()\n\n\t// Should be one leader\n\tleader := c.Leader()\n\tc.EnsureLeader(t, leader.localAddr)\n\n\t// Test the monotonic type assertion on the InmemStore.\n\t_, ok := leader.logs.(MonotonicLogStore)\n\tassert.False(t, ok)\n\n\tvar log LogStore\n\n\t// Wrapping the non-monotonic store as a LogCache should make it pass the\n\t// type assertion, but the underlying store is still non-monotonic.\n\tlog, _ = NewLogCache(100, leader.logs)\n\tmcast, ok := log.(MonotonicLogStore)\n\trequire.True(t, ok)\n\tassert.False(t, mcast.IsMonotonic())\n\n\t// Now create a new MockMonotonicLogStore using the leader logs and expect\n\t// it to work.\n\tlog = &MockMonotonicLogStore{s: leader.logs}\n\tmcast, ok = log.(MonotonicLogStore)\n\trequire.True(t, ok)\n\tassert.True(t, mcast.IsMonotonic())\n\n\t// Wrap the mock logstore in a LogCache and check again.\n\tlog, _ = NewLogCache(100, log)\n\tmcast, ok = log.(MonotonicLogStore)\n\trequire.True(t, ok)\n\tassert.True(t, mcast.IsMonotonic())\n}\n\nfunc TestRaft_CacheLogWithStoreError(t *testing.T) {\n\tc := MakeCluster(2, t, nil)\n\tdefer c.Close()\n\n\t// Should be one leader\n\tfollower := c.Followers()[0]\n\tleader := c.Leader()\n\tc.EnsureLeader(t, leader.localAddr)\n\n\t// There is no lock to protect this assignment I am afraid.\n\tes := &errorStore{LogStore: follower.logs}\n\tcl, _ := NewLogCache(100, es)\n\tfollower.logs = cl\n\n\t// Commit some logs\n\tfor i := 0; i < 5; i++ {\n\t\tfuture := leader.Apply([]byte(fmt.Sprintf(\"test%d\", i)), 0)\n\t\tif err := future.Error(); err != nil {\n\t\t\tt.Fatalf(\"[ERR] err: %v\", err)\n\t\t}\n\t}\n\n\t// Make the next fail\n\tes.failNext(1)\n\tleader.Apply([]byte(\"test6\"), 0)\n\n\tleader.Apply([]byte(\"test7\"), 0)\n\tfuture := leader.Apply([]byte(\"test8\"), 0)\n\n\t// Wait for the last future to apply\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"[ERR] err: %v\", err)\n\t}\n\n\t// Shutdown follower\n\tif f := follower.Shutdown(); f.Error() != nil {\n\t\tt.Fatalf(\"error shutting down follower: %v\", f.Error())\n\t}\n\n\t// Try to restart the follower and make sure it does not fail with a LogNotFound error\n\t_, trans := NewInmemTransport(follower.localAddr)\n\tfollower.logs = es.LogStore\n\tconf := follower.config()\n\tn, err := NewRaft(&conf, &MockFSM{}, follower.logs, follower.stable, follower.snapshots, trans)\n\tif err != nil {\n\t\tt.Fatalf(\"error restarting follower: %v\", err)\n\t}\n\tn.Shutdown()\n}\n\nfunc TestRaft_ReloadConfig(t *testing.T) {\n\tconf := inmemConfig(t)\n\tconf.LeaderLeaseTimeout = 40 * time.Millisecond\n\tc := MakeCluster(1, t, conf)\n\tdefer c.Close()\n\traft := c.rafts[0]\n\n\t// Make sure the reloadable values are as expected before\n\trequire.Equal(t, uint64(10240), raft.config().TrailingLogs)\n\trequire.Equal(t, 120*time.Second, raft.config().SnapshotInterval)\n\trequire.Equal(t, uint64(8192), raft.config().SnapshotThreshold)\n\n\t// Reload with different values\n\tnewCfg := ReloadableConfig{\n\t\tTrailingLogs:      12345,\n\t\tSnapshotInterval:  234 * time.Second,\n\t\tSnapshotThreshold: 6789,\n\t\tHeartbeatTimeout:  45 * time.Millisecond,\n\t\tElectionTimeout:   46 * time.Millisecond,\n\t}\n\n\trequire.NoError(t, raft.ReloadConfig(newCfg))\n\n\t// Now we should have new values\n\trequire.Equal(t, newCfg.TrailingLogs, raft.config().TrailingLogs)\n\trequire.Equal(t, newCfg.SnapshotInterval, raft.config().SnapshotInterval)\n\trequire.Equal(t, newCfg.SnapshotThreshold, raft.config().SnapshotThreshold)\n\trequire.Equal(t, newCfg.HeartbeatTimeout, raft.config().HeartbeatTimeout)\n\trequire.Equal(t, newCfg.ElectionTimeout, raft.config().ElectionTimeout)\n}\n\nfunc TestRaft_ReloadConfigValidates(t *testing.T) {\n\tconf := inmemConfig(t)\n\tc := MakeCluster(1, t, conf)\n\tdefer c.Close()\n\traft := c.rafts[0]\n\n\t// Make sure the reloadable values are as expected before\n\trequire.Equal(t, uint64(10240), raft.config().TrailingLogs)\n\trequire.Equal(t, 120*time.Second, raft.config().SnapshotInterval)\n\trequire.Equal(t, uint64(8192), raft.config().SnapshotThreshold)\n\n\t// Reload with different values that are invalid per ValidateConfig\n\tnewCfg := ReloadableConfig{\n\t\tTrailingLogs:      12345,\n\t\tSnapshotInterval:  1 * time.Millisecond, // must be >= 5 millisecond\n\t\tSnapshotThreshold: 6789,\n\t}\n\n\trequire.Error(t, raft.ReloadConfig(newCfg))\n\n\t// Now we should have same values\n\trequire.Equal(t, uint64(10240), raft.config().TrailingLogs)\n\trequire.Equal(t, 120*time.Second, raft.config().SnapshotInterval)\n\trequire.Equal(t, uint64(8192), raft.config().SnapshotThreshold)\n}\n\n// TODO: These are test cases we'd like to write for appendEntries().\n// Unfortunately, it's difficult to do so with the current way this file is\n// tested.\n//\n// Term check:\n// - m.term is too small: no-op.\n// - m.term is too large: update term, become follower, process request.\n// - m.term is right but we're candidate: become follower, process request.\n//\n// Previous entry check:\n// - prev is within the snapshot, before the snapshot's index: assume match.\n// - prev is within the snapshot, exactly the snapshot's index: check\n//   snapshot's term.\n// - prev is a log entry: check entry's term.\n// - prev is past the end of the log: return fail.\n//\n// New entries:\n// - new entries are all new: add them all.\n// - new entries are all duplicate: ignore them all without ever removing dups.\n// - new entries some duplicate, some new: add the new ones without ever\n//   removing dups.\n// - new entries all conflict: remove the conflicting ones, add their\n//   replacements.\n// - new entries some duplicate, some conflict: remove the conflicting ones,\n//   add their replacement, without ever removing dups.\n//\n// Storage errors handled properly.\n// Commit index updated properly.\n\nfunc TestRaft_InstallSnapshot_InvalidPeers(t *testing.T) {\n\t_, transport := NewInmemTransport(\"\")\n\tr := &Raft{\n\t\ttrans:  transport,\n\t\tlogger: hclog.New(nil),\n\t}\n\n\treq := &InstallSnapshotRequest{\n\t\tPeers: []byte(\"invalid msgpack\"),\n\t}\n\tchResp := make(chan RPCResponse, 1)\n\trpc := RPC{\n\t\tReader:   new(bytes.Buffer),\n\t\tRespChan: chResp,\n\t}\n\tr.installSnapshot(rpc, req)\n\tresp := <-chResp\n\trequire.Error(t, resp.Error)\n\trequire.Contains(t, resp.Error.Error(), \"failed to decode peers\")\n}\n\nfunc TestRaft_VoteNotGranted_WhenNodeNotInCluster(t *testing.T) {\n\t// Make a cluster\n\tc := MakeCluster(3, t, nil)\n\n\tdefer c.Close()\n\n\t// Get the leader\n\tleader := c.Leader()\n\n\t// Wait until we have 2 followers\n\tlimit := time.Now().Add(c.longstopTimeout)\n\tvar followers []*Raft\n\tfor time.Now().Before(limit) && len(followers) != 2 {\n\t\tc.WaitEvent(nil, c.conf.CommitTimeout)\n\t\tfollowers = c.GetInState(Follower)\n\t}\n\tif len(followers) != 2 {\n\t\tt.Fatalf(\"expected two followers: %v\", followers)\n\t}\n\n\t// Remove a follower\n\tfollowerRemoved := followers[0]\n\tfuture := leader.RemoveServer(followerRemoved.localID, 0, 0)\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// Wait a while\n\ttime.Sleep(c.propagateTimeout)\n\n\t// Other nodes should have fewer peers\n\tif configuration := c.getConfiguration(leader); len(configuration.Servers) != 2 {\n\t\tt.Fatalf(\"too many peers\")\n\t}\n\tif configuration := c.getConfiguration(followers[1]); len(configuration.Servers) != 2 {\n\t\tt.Fatalf(\"too many peers\")\n\t}\n\twaitForState(followerRemoved, Follower)\n\t// The removed node should be still in Follower state\n\trequire.Equal(t, Follower, followerRemoved.getState())\n\n\t// Prepare a Vote request from the removed follower\n\tfollower := followers[1]\n\tfollowerRemovedT := c.trans[c.IndexOf(followerRemoved)]\n\treqVote := RequestVoteRequest{\n\t\tRPCHeader:          followerRemoved.getRPCHeader(),\n\t\tTerm:               followerRemoved.getCurrentTerm() + 10,\n\t\tLastLogIndex:       followerRemoved.LastIndex(),\n\t\tLastLogTerm:        followerRemoved.getCurrentTerm(),\n\t\tLeadershipTransfer: false,\n\t}\n\t// a follower that thinks there's a leader should vote for that leader.\n\tvar resp RequestVoteResponse\n\n\t// partition the leader to simulate an unstable cluster\n\tc.Partition([]ServerAddress{leader.localAddr})\n\ttime.Sleep(c.propagateTimeout)\n\n\t// wait for the remaining follower to trigger an election\n\twaitForState(follower, Candidate)\n\n\t// send a vote request from the removed follower to the Candidate follower\n\tif err := followerRemovedT.RequestVote(follower.localID, follower.localAddr, &reqVote, &resp); err != nil {\n\t\tt.Fatalf(\"RequestVote RPC failed %v\", err)\n\t}\n\n\t// the vote request should not be granted, because the voter is not part of the cluster anymore\n\tif resp.Granted {\n\t\tt.Fatalf(\"expected vote to not be granted, but it was %+v\", resp)\n\t}\n}\n\nfunc TestRaft_ClusterCanRegainStability_WhenNonVoterWithHigherTermJoin(t *testing.T) {\n\t// Make a cluster\n\tc := MakeCluster(3, t, nil)\n\n\tdefer c.Close()\n\n\t// Get the leader\n\tleader := c.Leader()\n\n\t// Wait until we have 2 followers\n\tlimit := time.Now().Add(c.longstopTimeout)\n\tvar followers []*Raft\n\tfor time.Now().Before(limit) && len(followers) != 2 {\n\t\tc.WaitEvent(nil, c.conf.CommitTimeout)\n\t\tfollowers = c.GetInState(Follower)\n\t}\n\tif len(followers) != 2 {\n\t\tt.Fatalf(\"expected two followers: %v\", followers)\n\t}\n\n\t// Remove a follower\n\tfollowerRemoved := followers[0]\n\tc.Disconnect(followerRemoved.localAddr)\n\ttime.Sleep(c.propagateTimeout)\n\n\tfuture := leader.RemoveServer(followerRemoved.localID, 0, 0)\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\t// set that follower term to higher term to faster simulate a partitioning\n\tnewTerm := leader.getCurrentTerm() + 20\n\tfollowerRemoved.setCurrentTerm(newTerm)\n\t// Add the node back as NonVoter\n\tfuture = leader.AddNonvoter(followerRemoved.localID, followerRemoved.localAddr, 0, 0)\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\n\tc.FullyConnect()\n\n\t// Wait a while\n\ttime.Sleep(c.propagateTimeout)\n\t// Check the term is now a new term\n\tleader = c.Leader()\n\tcurrentTerm := leader.getCurrentTerm()\n\tif newTerm > currentTerm {\n\t\tt.Fatalf(\"term should have changed,%d < %d\", newTerm, currentTerm)\n\t}\n\n\t// check nonVoter is not elected\n\tif leader.localID == followerRemoved.localID {\n\t\tt.Fatalf(\"Should not be leader %s\", followerRemoved.localID)\n\t}\n\n\t// Write some logs to ensure they replicate\n\tfor i := 0; i < 100; i++ {\n\t\tfuture := leader.Apply([]byte(fmt.Sprintf(\"test%d\", i)), 0)\n\t\tif err := future.Error(); err != nil {\n\t\t\tt.Fatalf(\"[ERR] apply err: %v\", err)\n\t\t}\n\t}\n\tc.WaitForReplication(100)\n\n\t// Remove the server and add it back as Voter\n\tfuture = leader.RemoveServer(followerRemoved.localID, 0, 0)\n\tif err := future.Error(); err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tleader.AddVoter(followerRemoved.localID, followerRemoved.localAddr, 0, 0)\n\n\t// Wait a while\n\ttime.Sleep(c.propagateTimeout * 10)\n\n\t// Write some logs to ensure they replicate\n\tfor i := 100; i < 200; i++ {\n\t\tfuture := leader.Apply([]byte(fmt.Sprintf(\"test%d\", i)), 0)\n\t\tif err := future.Error(); err != nil {\n\t\t\tt.Fatalf(\"[ERR] apply err: %v\", err)\n\t\t}\n\t}\n\tc.WaitForReplication(200)\n\n\t// Check leader stable\n\tnewLeader := c.Leader()\n\tif newLeader.leaderID != leader.leaderID {\n\t\tt.Fatalf(\"leader changed\")\n\t}\n}\n\n// TestRaft_FollowerRemovalNoElection ensures that a leader election is not\n// started when a standby is shut down and restarted.\nfunc TestRaft_FollowerRemovalNoElection(t *testing.T) {\n\t// Make a cluster\n\tinmemConf := inmemConfig(t)\n\tinmemConf.HeartbeatTimeout = 100 * time.Millisecond\n\tinmemConf.ElectionTimeout = 100 * time.Millisecond\n\tc := MakeCluster(3, t, inmemConf)\n\n\tdefer c.Close()\n\terr := waitForLeader(c)\n\trequire.NoError(t, err)\n\tleader := c.Leader()\n\n\t// Wait until we have 2 followers\n\tlimit := time.Now().Add(c.longstopTimeout)\n\tvar followers []*Raft\n\tfor time.Now().Before(limit) && len(followers) != 2 {\n\t\tc.WaitEvent(nil, c.conf.CommitTimeout)\n\t\tfollowers = c.GetInState(Follower)\n\t}\n\tif len(followers) != 2 {\n\t\tt.Fatalf(\"expected two followers: %v\", followers)\n\t}\n\n\t// Disconnect one of the followers and wait for the heartbeat timeout\n\ti := 0\n\tfollower := c.rafts[i]\n\tif follower == c.Leader() {\n\t\ti = 1\n\t\tfollower = c.rafts[i]\n\t}\n\tlogs := follower.logs\n\tt.Logf(\"[INFO] restarting %v\", follower)\n\t// Shutdown follower\n\tif f := follower.Shutdown(); f.Error() != nil {\n\t\tt.Fatalf(\"error shutting down follower: %v\", f.Error())\n\t}\n\n\t_, trans := NewInmemTransport(follower.localAddr)\n\tconf := follower.config()\n\tn, err := NewRaft(&conf, &MockFSM{}, logs, follower.stable, follower.snapshots, trans)\n\tif err != nil {\n\t\tt.Fatalf(\"error restarting follower: %v\", err)\n\t}\n\tc.rafts[i] = n\n\tc.trans[i] = n.trans.(*InmemTransport)\n\tc.fsms[i] = n.fsm.(*MockFSM)\n\tc.FullyConnect()\n\t// There should be no re-election during this sleep\n\ttime.Sleep(250 * time.Millisecond)\n\n\t// Let things settle and make sure we recovered.\n\tc.EnsureLeader(t, leader.localAddr)\n\tc.EnsureSame(t)\n\tc.EnsureSamePeers(t)\n\tn.Shutdown()\n}\n\nfunc waitForState(follower *Raft, state RaftState) {\n\tcount := 0\n\tfor follower.getState() != state && count < 1000 {\n\t\tcount++\n\t\ttime.Sleep(1 * time.Millisecond)\n\t}\n}\n\nfunc waitForLeader(c *cluster) error {\n\tcount := 0\n\tfor count < 100 {\n\t\tr := c.GetInState(Leader)\n\t\tif len(r) >= 1 {\n\t\t\treturn nil\n\t\t}\n\t\tcount++\n\t\ttime.Sleep(50 * time.Millisecond)\n\t}\n\treturn errors.New(\"no leader elected\")\n}\n\nfunc TestRaft_runFollower_State_Transition(t *testing.T) {\n\ttype fields struct {\n\t\tconf     *Config\n\t\tservers  []Server\n\t\tserverID ServerID\n\t}\n\ttests := []struct {\n\t\tname          string\n\t\tfields        fields\n\t\texpectedState RaftState\n\t}{\n\t\t{\"NonVoter\", fields{conf: DefaultConfig(), servers: []Server{{Nonvoter, \"first\", \"\"}}, serverID: \"first\"}, Follower},\n\t\t{\"Voter\", fields{conf: DefaultConfig(), servers: []Server{{Voter, \"first\", \"\"}}, serverID: \"first\"}, Candidate},\n\t\t{\"Not in Config\", fields{conf: DefaultConfig(), servers: []Server{{Voter, \"second\", \"\"}}, serverID: \"first\"}, Follower},\n\t}\n\tfor _, tt := range tests {\n\t\tt.Run(tt.name, func(t *testing.T) {\n\t\t\t// set timeout to tests specific\n\t\t\ttt.fields.conf.LocalID = tt.fields.serverID\n\t\t\ttt.fields.conf.HeartbeatTimeout = 50 * time.Millisecond\n\t\t\ttt.fields.conf.ElectionTimeout = 50 * time.Millisecond\n\t\t\ttt.fields.conf.LeaderLeaseTimeout = 50 * time.Millisecond\n\t\t\ttt.fields.conf.CommitTimeout = 5 * time.Millisecond\n\t\t\ttt.fields.conf.SnapshotThreshold = 100\n\t\t\ttt.fields.conf.TrailingLogs = 10\n\t\t\ttt.fields.conf.skipStartup = true\n\n\t\t\t// Create a raft instance and set the latest configuration\n\t\t\tenv1 := MakeRaft(t, tt.fields.conf, false)\n\t\t\tenv1.raft.setLatestConfiguration(Configuration{Servers: tt.fields.servers}, 1)\n\t\t\tenv1.raft.setState(Follower)\n\n\t\t\t// run the follower loop exclusively\n\t\t\tgo env1.raft.runFollower()\n\n\t\t\t// wait enough time to have HeartbeatTimeout\n\t\t\ttime.Sleep(tt.fields.conf.HeartbeatTimeout * 3)\n\n\t\t\t// Check the follower loop set the right state\n\t\t\trequire.Equal(t, tt.expectedState, env1.raft.getState())\n\t\t})\n\t}\n}\n\nfunc TestRaft_runFollower_ReloadTimeoutConfigs(t *testing.T) {\n\tconf := DefaultConfig()\n\tconf.LocalID = ServerID(\"first\")\n\tconf.HeartbeatTimeout = 500 * time.Millisecond\n\tconf.ElectionTimeout = 500 * time.Millisecond\n\tconf.LeaderLeaseTimeout = 50 * time.Millisecond\n\tconf.CommitTimeout = 5 * time.Millisecond\n\tconf.SnapshotThreshold = 100\n\tconf.TrailingLogs = 10\n\tconf.skipStartup = true\n\n\tenv := MakeRaft(t, conf, false)\n\tservers := []Server{{Voter, \"first\", \"\"}}\n\tenv.raft.setLatestConfiguration(Configuration{Servers: servers}, 1)\n\tenv.raft.setState(Follower)\n\n\t// run the follower loop exclusively\n\tgo env.raft.runFollower()\n\n\tnewCfg := ReloadableConfig{\n\t\tTrailingLogs:      conf.TrailingLogs,\n\t\tSnapshotInterval:  conf.SnapshotInterval,\n\t\tSnapshotThreshold: conf.SnapshotThreshold,\n\t\tHeartbeatTimeout:  50 * time.Millisecond,\n\t\tElectionTimeout:   50 * time.Millisecond,\n\t}\n\trequire.NoError(t, env.raft.ReloadConfig(newCfg))\n\t// wait enough time to have HeartbeatTimeout\n\ttime.Sleep(3 * newCfg.HeartbeatTimeout)\n\n\t// Check the follower loop set the right state\n\trequire.Equal(t, Candidate, env.raft.getState())\n}\n\nfunc TestRaft_PreVote_ShouldNotRejectLeader(t *testing.T) {\n\t// Make a cluster\n\tc := MakeCluster(3, t, nil)\n\tdefer c.Close()\n\terr := waitForLeader(c)\n\trequire.NoError(t, err)\n\tleader := c.Leader()\n\n\t// Wait until we have 2 followers\n\tlimit := time.Now().Add(c.longstopTimeout)\n\tvar followers []*Raft\n\tfor time.Now().Before(limit) && len(followers) != 2 {\n\t\tc.WaitEvent(nil, c.conf.CommitTimeout)\n\t\tfollowers = c.GetInState(Follower)\n\t}\n\tif len(followers) != 2 {\n\t\tt.Fatalf(\"expected two followers: %v\", followers)\n\t}\n\n\t// A follower who thinks that x is the leader should not reject x's pre-vote\n\tfollower := followers[0]\n\trequire.Equal(t, leader.localAddr, follower.Leader())\n\n\treqPreVote := RequestPreVoteRequest{\n\t\tRPCHeader:    leader.getRPCHeader(),\n\t\tTerm:         leader.getCurrentTerm() + 1,\n\t\tLastLogIndex: leader.lastLogIndex,\n\t\tLastLogTerm:  leader.getCurrentTerm(),\n\t}\n\n\tvar resp RequestPreVoteResponse\n\tleaderT := c.trans[c.IndexOf(leader)]\n\tif err := leaderT.RequestPreVote(follower.localID, follower.localAddr, &reqPreVote, &resp); err != nil {\n\t\tt.Fatalf(\"RequestPreVote RPC failed %v\", err)\n\t}\n\n\t// the pre-vote should be granted\n\tif !resp.Granted {\n\t\tt.Fatalf(\"expected pre-vote to be granted, but it wasn't, %+v\", resp)\n\t}\n}\n\nfunc TestRaft_PreVote_ShouldRejectNonLeader(t *testing.T) {\n\t// Make a cluster\n\tc := MakeCluster(3, t, nil)\n\tdefer c.Close()\n\terr := waitForLeader(c)\n\trequire.NoError(t, err)\n\n\t// Wait until we have 2 followers\n\tlimit := time.Now().Add(c.longstopTimeout)\n\tvar followers []*Raft\n\tfor time.Now().Before(limit) && len(followers) != 2 {\n\t\tc.WaitEvent(nil, c.conf.CommitTimeout)\n\t\tfollowers = c.GetInState(Follower)\n\t}\n\tif len(followers) != 2 {\n\t\tt.Fatalf(\"expected two followers: %v\", followers)\n\t}\n\n\t// A follower who thinks that x is the leader should reject another node's pre-vote request\n\tfollower := followers[0]\n\tanotherFollower := followers[1]\n\trequire.NotEqual(t, anotherFollower.localAddr, follower.Leader())\n\n\treqPreVote := RequestPreVoteRequest{\n\t\tRPCHeader:    anotherFollower.getRPCHeader(),\n\t\tTerm:         anotherFollower.getCurrentTerm() + 1,\n\t\tLastLogIndex: anotherFollower.lastLogIndex,\n\t\tLastLogTerm:  anotherFollower.getCurrentTerm(),\n\t}\n\n\tvar resp RequestPreVoteResponse\n\tanotherFollowerT := c.trans[c.IndexOf(anotherFollower)]\n\tif err := anotherFollowerT.RequestPreVote(follower.localID, follower.localAddr, &reqPreVote, &resp); err != nil {\n\t\tt.Fatalf(\"RequestPreVote RPC failed %v\", err)\n\t}\n\n\t// the pre-vote should not be granted\n\tif resp.Granted {\n\t\tt.Fatalf(\"expected pre-vote to not be granted, but it was granted, %+v\", resp)\n\t}\n}\n"
  },
  {
    "path": "replication.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"errors\"\n\t\"fmt\"\n\t\"sync\"\n\t\"sync/atomic\"\n\t\"time\"\n\n\t\"github.com/hashicorp/go-metrics/compat\"\n)\n\nconst (\n\tmaxFailureScale = 12\n\tfailureWait     = 10 * time.Millisecond\n)\n\nvar (\n\t// ErrLogNotFound indicates a given log entry is not available.\n\tErrLogNotFound = errors.New(\"log not found\")\n\n\t// ErrPipelineReplicationNotSupported can be returned by the transport to\n\t// signal that pipeline replication is not supported in general, and that\n\t// no error message should be produced.\n\tErrPipelineReplicationNotSupported = errors.New(\"pipeline replication not supported\")\n)\n\n// followerReplication is in charge of sending snapshots and log entries from\n// this leader during this particular term to a remote follower.\ntype followerReplication struct {\n\t// currentTerm and nextIndex must be kept at the top of the struct so\n\t// they're 64 bit aligned which is a requirement for atomic ops on 32 bit\n\t// platforms.\n\n\t// currentTerm is the term of this leader, to be included in AppendEntries\n\t// requests.\n\tcurrentTerm uint64\n\n\t// nextIndex is the index of the next log entry to send to the follower,\n\t// which may fall past the end of the log.\n\tnextIndex uint64\n\n\t// peer contains the network address and ID of the remote follower.\n\tpeer Server\n\t// peerLock protects 'peer'\n\tpeerLock sync.RWMutex\n\n\t// commitment tracks the entries acknowledged by followers so that the\n\t// leader's commit index can advance. It is updated on successful\n\t// AppendEntries responses.\n\tcommitment *commitment\n\n\t// stopCh is notified/closed when this leader steps down or the follower is\n\t// removed from the cluster. In the follower removed case, it carries a log\n\t// index; replication should be attempted with a best effort up through that\n\t// index, before exiting.\n\tstopCh chan uint64\n\n\t// triggerCh is notified every time new entries are appended to the log.\n\ttriggerCh chan struct{}\n\n\t// triggerDeferErrorCh is used to provide a backchannel. By sending a\n\t// deferErr, the sender can be notified when the replication is done.\n\ttriggerDeferErrorCh chan *deferError\n\n\t// lastContact is updated to the current time whenever any response is\n\t// received from the follower (successful or not). This is used to check\n\t// whether the leader should step down (Raft.checkLeaderLease()).\n\tlastContact time.Time\n\t// lastContactLock protects 'lastContact'.\n\tlastContactLock sync.RWMutex\n\n\t// failures counts the number of failed RPCs since the last success, which is\n\t// used to apply backoff.\n\tfailures uint64\n\n\t// notifyCh is notified to send out a heartbeat, which is used to check that\n\t// this server is still leader.\n\tnotifyCh chan struct{}\n\t// notify is a map of futures to be resolved upon receipt of an\n\t// acknowledgement, then cleared from this map.\n\tnotify map[*verifyFuture]struct{}\n\t// notifyLock protects 'notify'.\n\tnotifyLock sync.Mutex\n\n\t// stepDown is used to indicate to the leader that we\n\t// should step down based on information from a follower.\n\tstepDown chan struct{}\n\n\t// allowPipeline is used to determine when to pipeline the AppendEntries RPCs.\n\t// It is private to this replication goroutine.\n\tallowPipeline bool\n}\n\n// notifyAll is used to notify all the waiting verify futures\n// if the follower believes we are still the leader.\nfunc (s *followerReplication) notifyAll(leader bool) {\n\t// Clear the waiting notifies minimizing lock time\n\ts.notifyLock.Lock()\n\tn := s.notify\n\ts.notify = make(map[*verifyFuture]struct{})\n\ts.notifyLock.Unlock()\n\n\t// Submit our votes\n\tfor v := range n {\n\t\tv.vote(leader)\n\t}\n}\n\n// cleanNotify is used to delete notify, .\nfunc (s *followerReplication) cleanNotify(v *verifyFuture) {\n\ts.notifyLock.Lock()\n\tdelete(s.notify, v)\n\ts.notifyLock.Unlock()\n}\n\n// LastContact returns the time of last contact.\nfunc (s *followerReplication) LastContact() time.Time {\n\ts.lastContactLock.RLock()\n\tlast := s.lastContact\n\ts.lastContactLock.RUnlock()\n\treturn last\n}\n\n// setLastContact sets the last contact to the current time.\nfunc (s *followerReplication) setLastContact() {\n\ts.lastContactLock.Lock()\n\ts.lastContact = time.Now()\n\ts.lastContactLock.Unlock()\n}\n\n// replicate is a long running routine that replicates log entries to a single\n// follower.\nfunc (r *Raft) replicate(s *followerReplication) {\n\t// Start an async heartbeating routing\n\tstopHeartbeat := make(chan struct{})\n\tdefer close(stopHeartbeat)\n\tr.goFunc(func() { r.heartbeat(s, stopHeartbeat) })\n\nRPC:\n\tshouldStop := false\n\tfor !shouldStop {\n\t\tselect {\n\t\tcase maxIndex := <-s.stopCh:\n\t\t\t// Make a best effort to replicate up to this index\n\t\t\tif maxIndex > 0 {\n\t\t\t\tr.replicateTo(s, maxIndex)\n\t\t\t}\n\t\t\treturn\n\t\tcase deferErr := <-s.triggerDeferErrorCh:\n\t\t\tlastLogIdx, _ := r.getLastLog()\n\t\t\tshouldStop = r.replicateTo(s, lastLogIdx)\n\t\t\tif !shouldStop {\n\t\t\t\tdeferErr.respond(nil)\n\t\t\t} else {\n\t\t\t\tdeferErr.respond(fmt.Errorf(\"replication failed\"))\n\t\t\t}\n\t\tcase <-s.triggerCh:\n\t\t\tlastLogIdx, _ := r.getLastLog()\n\t\t\tshouldStop = r.replicateTo(s, lastLogIdx)\n\t\t// This is _not_ our heartbeat mechanism but is to ensure\n\t\t// followers quickly learn the leader's commit index when\n\t\t// raft commits stop flowing naturally. The actual heartbeats\n\t\t// can't do this to keep them unblocked by disk IO on the\n\t\t// follower. See https://github.com/hashicorp/raft/issues/282.\n\t\tcase <-randomTimeout(r.config().CommitTimeout):\n\t\t\tlastLogIdx, _ := r.getLastLog()\n\t\t\tshouldStop = r.replicateTo(s, lastLogIdx)\n\t\t}\n\n\t\t// If things looks healthy, switch to pipeline mode\n\t\tif !shouldStop && s.allowPipeline {\n\t\t\tgoto PIPELINE\n\t\t}\n\t}\n\treturn\n\nPIPELINE:\n\t// Disable until re-enabled\n\ts.allowPipeline = false\n\n\t// Replicates using a pipeline for high performance. This method\n\t// is not able to gracefully recover from errors, and so we fall back\n\t// to standard mode on failure.\n\tif err := r.pipelineReplicate(s); err != nil {\n\t\tif err != ErrPipelineReplicationNotSupported {\n\t\t\ts.peerLock.RLock()\n\t\t\tpeer := s.peer\n\t\t\ts.peerLock.RUnlock()\n\t\t\tr.logger.Error(\"failed to start pipeline replication to\", \"peer\", peer, \"error\", err)\n\t\t}\n\t}\n\tgoto RPC\n}\n\n// replicateTo is a helper to replicate(), used to replicate the logs up to a\n// given last index.\n// If the follower log is behind, we take care to bring them up to date.\nfunc (r *Raft) replicateTo(s *followerReplication, lastIndex uint64) (shouldStop bool) {\n\t// Create the base request\n\tvar req AppendEntriesRequest\n\tvar resp AppendEntriesResponse\n\tvar start time.Time\n\tvar peer Server\n\nSTART:\n\t// Prevent an excessive retry rate on errors\n\tif s.failures > 0 {\n\t\tselect {\n\t\tcase <-time.After(backoff(failureWait, s.failures, maxFailureScale)):\n\t\tcase <-r.shutdownCh:\n\t\t}\n\t}\n\n\ts.peerLock.RLock()\n\tpeer = s.peer\n\ts.peerLock.RUnlock()\n\n\t// Setup the request\n\tif err := r.setupAppendEntries(s, &req, atomic.LoadUint64(&s.nextIndex), lastIndex); err == ErrLogNotFound {\n\t\tgoto SEND_SNAP\n\t} else if err != nil {\n\t\treturn\n\t}\n\n\t// Make the RPC call\n\tstart = time.Now()\n\tif err := r.trans.AppendEntries(peer.ID, peer.Address, &req, &resp); err != nil {\n\t\tr.logger.Error(\"failed to appendEntries to\", \"peer\", peer, \"error\", err)\n\t\ts.failures++\n\t\treturn\n\t}\n\tappendStats(string(peer.ID), start, float32(len(req.Entries)), r.noLegacyTelemetry)\n\n\t// Check for a newer term, stop running\n\tif resp.Term > req.Term {\n\t\tr.handleStaleTerm(s)\n\t\treturn true\n\t}\n\n\t// Update the last contact\n\ts.setLastContact()\n\n\t// Update s based on success\n\tif resp.Success {\n\t\t// Update our replication state\n\t\tupdateLastAppended(s, &req)\n\n\t\t// Clear any failures, allow pipelining\n\t\ts.failures = 0\n\t\ts.allowPipeline = true\n\t} else {\n\t\tatomic.StoreUint64(&s.nextIndex, max(min(s.nextIndex-1, resp.LastLog+1), 1))\n\t\tif resp.NoRetryBackoff {\n\t\t\ts.failures = 0\n\t\t} else {\n\t\t\ts.failures++\n\t\t}\n\t\tr.logger.Warn(\"appendEntries rejected, sending older logs\", \"peer\", peer, \"next\", atomic.LoadUint64(&s.nextIndex))\n\t}\n\nCHECK_MORE:\n\t// Poll the stop channel here in case we are looping and have been asked\n\t// to stop, or have stepped down as leader. Even for the best effort case\n\t// where we are asked to replicate to a given index and then shutdown,\n\t// it's better to not loop in here to send lots of entries to a straggler\n\t// that's leaving the cluster anyways.\n\tselect {\n\tcase <-s.stopCh:\n\t\treturn true\n\tdefault:\n\t}\n\n\t// Check if there are more logs to replicate\n\tif atomic.LoadUint64(&s.nextIndex) <= lastIndex {\n\t\tgoto START\n\t}\n\treturn\n\n\t// SEND_SNAP is used when we fail to get a log, usually because the follower\n\t// is too far behind, and we must ship a snapshot down instead\nSEND_SNAP:\n\tif stop, err := r.sendLatestSnapshot(s); stop {\n\t\treturn true\n\t} else if err != nil {\n\t\tr.logger.Error(\"failed to send snapshot to\", \"peer\", peer, \"error\", err)\n\t\treturn\n\t}\n\n\t// Check if there is more to replicate\n\tgoto CHECK_MORE\n}\n\n// sendLatestSnapshot is used to send the latest snapshot we have\n// down to our follower.\nfunc (r *Raft) sendLatestSnapshot(s *followerReplication) (bool, error) {\n\t// Get the snapshots\n\tsnapshots, err := r.snapshots.List()\n\tif err != nil {\n\t\tr.logger.Error(\"failed to list snapshots\", \"error\", err)\n\t\treturn false, err\n\t}\n\n\t// Check we have at least a single snapshot\n\tif len(snapshots) == 0 {\n\t\treturn false, fmt.Errorf(\"no snapshots found\")\n\t}\n\n\t// Open the most recent snapshot\n\tsnapID := snapshots[0].ID\n\tr.logger.Info(\"opening snapshot\", \"id\", snapID)\n\tmeta, snapshot, err := r.snapshots.Open(snapID)\n\tif err != nil {\n\t\tr.logger.Error(\"failed to open snapshot\", \"id\", snapID, \"error\", err)\n\t\treturn false, err\n\t}\n\tdefer func() { _ = snapshot.Close() }()\n\n\t// Setup the request\n\treq := InstallSnapshotRequest{\n\t\tRPCHeader:       r.getRPCHeader(),\n\t\tSnapshotVersion: meta.Version,\n\t\tTerm:            s.currentTerm,\n\t\t// this is needed for retro compatibility, before RPCHeader.Addr was added\n\t\tLeader:             r.trans.EncodePeer(r.localID, r.localAddr),\n\t\tLastLogIndex:       meta.Index,\n\t\tLastLogTerm:        meta.Term,\n\t\tPeers:              meta.Peers,\n\t\tSize:               meta.Size,\n\t\tConfiguration:      EncodeConfiguration(meta.Configuration),\n\t\tConfigurationIndex: meta.ConfigurationIndex,\n\t}\n\n\ts.peerLock.RLock()\n\tpeer := s.peer\n\ts.peerLock.RUnlock()\n\n\tr.logger.Info(\"installing snapshot on\", \"peer\", peer.ID, \"id\", snapID, \"size\", req.Size)\n\t// Make the call\n\tstart := time.Now()\n\tvar resp InstallSnapshotResponse\n\tif err := r.trans.InstallSnapshot(peer.ID, peer.Address, &req, &resp, snapshot); err != nil {\n\t\tr.logger.Error(\"failed to install snapshot\", \"peer\", peer.ID, \"id\", snapID, \"error\", err)\n\t\ts.failures++\n\t\treturn false, err\n\t}\n\tlabels := []metrics.Label{{Name: \"peer_id\", Value: string(peer.ID)}}\n\tmetrics.MeasureSinceWithLabels([]string{\"raft\", \"replication\", \"installSnapshot\"}, start, labels)\n\n\tif !r.noLegacyTelemetry {\n\t\t// Duplicated information. Kept for backward compatibility.\n\t\tmetrics.MeasureSince([]string{\"raft\", \"replication\", \"installSnapshot\", string(peer.ID)}, start)\n\t}\n\n\t// Check for a newer term, stop running\n\tif resp.Term > req.Term {\n\t\tr.handleStaleTerm(s)\n\t\treturn true, nil\n\t}\n\n\t// Update the last contact\n\ts.setLastContact()\n\n\t// Check for success\n\tif resp.Success {\n\t\t// Update the indexes\n\t\tatomic.StoreUint64(&s.nextIndex, meta.Index+1)\n\t\ts.commitment.match(peer.ID, meta.Index)\n\n\t\t// Clear any failures\n\t\ts.failures = 0\n\n\t\t// Notify we are still leader\n\t\ts.notifyAll(true)\n\t} else {\n\t\ts.failures++\n\t\tr.logger.Warn(\"installSnapshot rejected to\", \"peer\", peer.ID, \"id\", snapID)\n\t}\n\treturn false, nil\n}\n\n// heartbeat is used to periodically invoke AppendEntries on a peer\n// to ensure they don't time out. This is done async of replicate(),\n// since that routine could potentially be blocked on disk IO.\nfunc (r *Raft) heartbeat(s *followerReplication, stopCh chan struct{}) {\n\tvar failures uint64\n\treq := AppendEntriesRequest{\n\t\tRPCHeader: r.getRPCHeader(),\n\t\tTerm:      s.currentTerm,\n\t\t// this is needed for retro compatibility, before RPCHeader.Addr was added\n\t\tLeader: r.trans.EncodePeer(r.localID, r.localAddr),\n\t}\n\n\tvar resp AppendEntriesResponse\n\tfor {\n\t\t// Wait for the next heartbeat interval or forced notify\n\t\tselect {\n\t\tcase <-s.notifyCh:\n\t\tcase <-randomTimeout(r.config().HeartbeatTimeout / 10):\n\t\tcase <-stopCh:\n\t\t\treturn\n\t\t}\n\n\t\ts.peerLock.RLock()\n\t\tpeer := s.peer\n\t\ts.peerLock.RUnlock()\n\n\t\tstart := time.Now()\n\t\tif err := r.trans.AppendEntries(peer.ID, peer.Address, &req, &resp); err != nil {\n\t\t\tnextBackoffTime := cappedExponentialBackoff(failureWait, failures, maxFailureScale, r.config().HeartbeatTimeout/2)\n\t\t\tr.logger.Error(\"failed to heartbeat to\", \"peer\", peer.Address, \"backoff time\",\n\t\t\t\tnextBackoffTime, \"error\", err)\n\t\t\tr.observe(FailedHeartbeatObservation{PeerID: peer.ID, LastContact: s.LastContact()})\n\t\t\tfailures++\n\t\t\tselect {\n\t\t\tcase <-time.After(nextBackoffTime):\n\t\t\tcase <-stopCh:\n\t\t\t\treturn\n\t\t\t}\n\t\t} else {\n\t\t\tif failures > 0 {\n\t\t\t\tr.observe(ResumedHeartbeatObservation{PeerID: peer.ID})\n\t\t\t}\n\t\t\ts.setLastContact()\n\t\t\tfailures = 0\n\t\t\tlabels := []metrics.Label{{Name: \"peer_id\", Value: string(peer.ID)}}\n\t\t\tmetrics.MeasureSinceWithLabels([]string{\"raft\", \"replication\", \"heartbeat\"}, start, labels)\n\n\t\t\tif !r.noLegacyTelemetry {\n\t\t\t\t// Duplicated information. Kept for backward compatibility.\n\t\t\t\tmetrics.MeasureSince([]string{\"raft\", \"replication\", \"heartbeat\", string(peer.ID)}, start)\n\t\t\t}\n\n\t\t\ts.notifyAll(resp.Success)\n\t\t}\n\t}\n}\n\n// pipelineReplicate is used when we have synchronized our state with the follower,\n// and want to switch to a higher performance pipeline mode of replication.\n// We only pipeline AppendEntries commands, and if we ever hit an error, we fall\n// back to the standard replication which can handle more complex situations.\nfunc (r *Raft) pipelineReplicate(s *followerReplication) error {\n\ts.peerLock.RLock()\n\tpeer := s.peer\n\ts.peerLock.RUnlock()\n\n\t// Create a new pipeline\n\tpipeline, err := r.trans.AppendEntriesPipeline(peer.ID, peer.Address)\n\tif err != nil {\n\t\treturn err\n\t}\n\tdefer func() { _ = pipeline.Close() }()\n\n\t// Log start and stop of pipeline\n\tr.logger.Info(\"pipelining replication\", \"peer\", peer)\n\tdefer r.logger.Info(\"aborting pipeline replication\", \"peer\", peer)\n\n\t// Create a shutdown and finish channel\n\tstopCh := make(chan struct{})\n\tfinishCh := make(chan struct{})\n\n\t// Start a dedicated decoder\n\tr.goFunc(func() { r.pipelineDecode(s, pipeline, stopCh, finishCh) })\n\n\t// Start pipeline sends at the last good nextIndex\n\tnextIndex := atomic.LoadUint64(&s.nextIndex)\n\n\tshouldStop := false\nSEND:\n\tfor !shouldStop {\n\t\tselect {\n\t\tcase <-finishCh:\n\t\t\tbreak SEND\n\t\tcase maxIndex := <-s.stopCh:\n\t\t\t// Make a best effort to replicate up to this index\n\t\t\tif maxIndex > 0 {\n\t\t\t\tr.pipelineSend(s, pipeline, &nextIndex, maxIndex)\n\t\t\t}\n\t\t\tbreak SEND\n\t\tcase deferErr := <-s.triggerDeferErrorCh:\n\t\t\tlastLogIdx, _ := r.getLastLog()\n\t\t\tshouldStop = r.pipelineSend(s, pipeline, &nextIndex, lastLogIdx)\n\t\t\tif !shouldStop {\n\t\t\t\tdeferErr.respond(nil)\n\t\t\t} else {\n\t\t\t\tdeferErr.respond(fmt.Errorf(\"replication failed\"))\n\t\t\t}\n\t\tcase <-s.triggerCh:\n\t\t\tlastLogIdx, _ := r.getLastLog()\n\t\t\tshouldStop = r.pipelineSend(s, pipeline, &nextIndex, lastLogIdx)\n\t\tcase <-randomTimeout(r.config().CommitTimeout):\n\t\t\tlastLogIdx, _ := r.getLastLog()\n\t\t\tshouldStop = r.pipelineSend(s, pipeline, &nextIndex, lastLogIdx)\n\t\t}\n\t}\n\n\t// Stop our decoder, and wait for it to finish\n\tclose(stopCh)\n\tselect {\n\tcase <-finishCh:\n\tcase <-r.shutdownCh:\n\t}\n\treturn nil\n}\n\n// pipelineSend is used to send data over a pipeline. It is a helper to\n// pipelineReplicate.\nfunc (r *Raft) pipelineSend(s *followerReplication, p AppendPipeline, nextIdx *uint64, lastIndex uint64) (shouldStop bool) {\n\t// Create a new append request\n\treq := new(AppendEntriesRequest)\n\tif err := r.setupAppendEntries(s, req, *nextIdx, lastIndex); err != nil {\n\t\treturn true\n\t}\n\n\t// Pipeline the append entries\n\tif _, err := p.AppendEntries(req, new(AppendEntriesResponse)); err != nil {\n\t\tr.logger.Error(\"failed to pipeline appendEntries\", \"peer\", s.peer, \"error\", err)\n\t\treturn true\n\t}\n\n\t// Increase the next send log to avoid re-sending old logs\n\tif n := len(req.Entries); n > 0 {\n\t\tlast := req.Entries[n-1]\n\t\tatomic.StoreUint64(nextIdx, last.Index+1)\n\t}\n\treturn false\n}\n\n// pipelineDecode is used to decode the responses of pipelined requests.\nfunc (r *Raft) pipelineDecode(s *followerReplication, p AppendPipeline, stopCh, finishCh chan struct{}) {\n\tdefer close(finishCh)\n\trespCh := p.Consumer()\n\tfor {\n\t\tselect {\n\t\tcase ready := <-respCh:\n\t\t\ts.peerLock.RLock()\n\t\t\tpeer := s.peer\n\t\t\ts.peerLock.RUnlock()\n\n\t\t\treq, resp := ready.Request(), ready.Response()\n\t\t\tappendStats(string(peer.ID), ready.Start(), float32(len(req.Entries)), r.noLegacyTelemetry)\n\n\t\t\t// Check for a newer term, stop running\n\t\t\tif resp.Term > req.Term {\n\t\t\t\tr.handleStaleTerm(s)\n\t\t\t\treturn\n\t\t\t}\n\n\t\t\t// Update the last contact\n\t\t\ts.setLastContact()\n\n\t\t\t// Abort pipeline if not successful\n\t\t\tif !resp.Success {\n\t\t\t\treturn\n\t\t\t}\n\n\t\t\t// Update our replication state\n\t\t\tupdateLastAppended(s, req)\n\t\tcase <-stopCh:\n\t\t\treturn\n\t\t}\n\t}\n}\n\n// setupAppendEntries is used to setup an append entries request.\nfunc (r *Raft) setupAppendEntries(s *followerReplication, req *AppendEntriesRequest, nextIndex, lastIndex uint64) error {\n\treq.RPCHeader = r.getRPCHeader()\n\treq.Term = s.currentTerm\n\t// this is needed for retro compatibility, before RPCHeader.Addr was added\n\treq.Leader = r.trans.EncodePeer(r.localID, r.localAddr)\n\treq.LeaderCommitIndex = r.getCommitIndex()\n\tif err := r.setPreviousLog(req, nextIndex); err != nil {\n\t\treturn err\n\t}\n\tif err := r.setNewLogs(req, nextIndex, lastIndex); err != nil {\n\t\treturn err\n\t}\n\treturn nil\n}\n\n// setPreviousLog is used to setup the PrevLogEntry and PrevLogTerm for an\n// AppendEntriesRequest given the next index to replicate.\nfunc (r *Raft) setPreviousLog(req *AppendEntriesRequest, nextIndex uint64) error {\n\t// Guard for the first index, since there is no 0 log entry\n\t// Guard against the previous index being a snapshot as well\n\tlastSnapIdx, lastSnapTerm := r.getLastSnapshot()\n\tif nextIndex == 1 {\n\t\treq.PrevLogEntry = 0\n\t\treq.PrevLogTerm = 0\n\n\t} else if (nextIndex - 1) == lastSnapIdx {\n\t\treq.PrevLogEntry = lastSnapIdx\n\t\treq.PrevLogTerm = lastSnapTerm\n\n\t} else {\n\t\tvar l Log\n\t\tif err := r.logs.GetLog(nextIndex-1, &l); err != nil {\n\t\t\tr.logger.Error(\"failed to get log\", \"index\", nextIndex-1, \"error\", err)\n\t\t\treturn err\n\t\t}\n\n\t\t// Set the previous index and term (0 if nextIndex is 1)\n\t\treq.PrevLogEntry = l.Index\n\t\treq.PrevLogTerm = l.Term\n\t}\n\treturn nil\n}\n\n// setNewLogs is used to setup the logs which should be appended for a request.\nfunc (r *Raft) setNewLogs(req *AppendEntriesRequest, nextIndex, lastIndex uint64) error {\n\t// Append up to MaxAppendEntries or up to the lastIndex. we need to use a\n\t// consistent value for maxAppendEntries in the lines below in case it ever\n\t// becomes reloadable.\n\tmaxAppendEntries := r.config().MaxAppendEntries\n\treq.Entries = make([]*Log, 0, maxAppendEntries)\n\tmaxIndex := min(nextIndex+uint64(maxAppendEntries)-1, lastIndex)\n\tfor i := nextIndex; i <= maxIndex; i++ {\n\t\toldLog := new(Log)\n\t\tif err := r.logs.GetLog(i, oldLog); err != nil {\n\t\t\tr.logger.Error(\"failed to get log\", \"index\", i, \"error\", err)\n\t\t\treturn err\n\t\t}\n\t\treq.Entries = append(req.Entries, oldLog)\n\t}\n\treturn nil\n}\n\n// appendStats is used to emit stats about an AppendEntries invocation.\nfunc appendStats(peer string, start time.Time, logs float32, skipLegacy bool) {\n\tlabels := []metrics.Label{{Name: \"peer_id\", Value: peer}}\n\tmetrics.MeasureSinceWithLabels([]string{\"raft\", \"replication\", \"appendEntries\", \"rpc\"}, start, labels)\n\tmetrics.IncrCounterWithLabels([]string{\"raft\", \"replication\", \"appendEntries\", \"logs\"}, logs, labels)\n\n\tif !skipLegacy {\n\t\t// Duplicated information. Kept for backward compatibility.\n\t\tmetrics.MeasureSince([]string{\"raft\", \"replication\", \"appendEntries\", \"rpc\", peer}, start)\n\t\tmetrics.IncrCounter([]string{\"raft\", \"replication\", \"appendEntries\", \"logs\", peer}, logs)\n\t}\n}\n\n// handleStaleTerm is used when a follower indicates that we have a stale term.\nfunc (r *Raft) handleStaleTerm(s *followerReplication) {\n\tr.logger.Error(\"peer has newer term, stopping replication\", \"peer\", s.peer)\n\ts.notifyAll(false) // No longer leader\n\tasyncNotifyCh(s.stepDown)\n}\n\n// updateLastAppended is used to update follower replication state after a\n// successful AppendEntries RPC.\n// TODO: This isn't used during InstallSnapshot, but the code there is similar.\nfunc updateLastAppended(s *followerReplication, req *AppendEntriesRequest) {\n\t// Mark any inflight logs as committed\n\tif logs := req.Entries; len(logs) > 0 {\n\t\tlast := logs[len(logs)-1]\n\t\tatomic.StoreUint64(&s.nextIndex, last.Index+1)\n\t\ts.commitment.match(s.peer.ID, last.Index)\n\t}\n\n\t// Notify still leader\n\ts.notifyAll(true)\n}\n"
  },
  {
    "path": "saturation.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"math\"\n\t\"time\"\n\n\t\"github.com/hashicorp/go-metrics/compat\"\n)\n\n// saturationMetric measures the saturation (percentage of time spent working vs\n// waiting for work) of an event processing loop, such as runFSM. It reports the\n// saturation as a gauge metric (at most) once every reportInterval.\n//\n// Callers must instrument their loop with calls to sleeping and working, starting\n// with a call to sleeping.\n//\n// Note: the caller must be single-threaded and saturationMetric is not safe for\n// concurrent use by multiple goroutines.\ntype saturationMetric struct {\n\treportInterval time.Duration\n\n\t// slept contains time for which the event processing loop was sleeping rather\n\t// than working in the period since lastReport.\n\tslept time.Duration\n\n\t// lost contains time that is considered lost due to incorrect use of\n\t// saturationMetricBucket (e.g. calling sleeping() or working() multiple\n\t// times in succession) in the period since lastReport.\n\tlost time.Duration\n\n\tlastReport, sleepBegan, workBegan time.Time\n\n\t// These are overwritten in tests.\n\tnowFn    func() time.Time\n\treportFn func(float32)\n}\n\n// newSaturationMetric creates a saturationMetric that will update the gauge\n// with the given name at the given reportInterval. keepPrev determines the\n// number of previous measurements that will be used to smooth out spikes.\nfunc newSaturationMetric(name []string, reportInterval time.Duration) *saturationMetric {\n\tm := &saturationMetric{\n\t\treportInterval: reportInterval,\n\t\tnowFn:          time.Now,\n\t\tlastReport:     time.Now(),\n\t\treportFn:       func(sat float32) { metrics.AddSample(name, sat) },\n\t}\n\treturn m\n}\n\n// sleeping records the time at which the loop began waiting for work. After the\n// initial call it must always be proceeded by a call to working.\nfunc (s *saturationMetric) sleeping() {\n\tnow := s.nowFn()\n\n\tif !s.sleepBegan.IsZero() {\n\t\t// sleeping called twice in succession. Count that time as lost rather than\n\t\t// measuring nonsense.\n\t\ts.lost += now.Sub(s.sleepBegan)\n\t}\n\n\ts.sleepBegan = now\n\ts.workBegan = time.Time{}\n\ts.report()\n}\n\n// working records the time at which the loop began working. It must always be\n// proceeded by a call to sleeping.\nfunc (s *saturationMetric) working() {\n\tnow := s.nowFn()\n\n\tif s.workBegan.IsZero() {\n\t\tif s.sleepBegan.IsZero() {\n\t\t\t// working called before the initial call to sleeping. Count that time as\n\t\t\t// lost rather than measuring nonsense.\n\t\t\ts.lost += now.Sub(s.lastReport)\n\t\t} else {\n\t\t\ts.slept += now.Sub(s.sleepBegan)\n\t\t}\n\t} else {\n\t\t// working called twice in succession. Count that time as lost rather than\n\t\t// measuring nonsense.\n\t\ts.lost += now.Sub(s.workBegan)\n\t}\n\n\ts.workBegan = now\n\ts.sleepBegan = time.Time{}\n\ts.report()\n}\n\n// report updates the gauge if reportInterval has passed since our last report.\nfunc (s *saturationMetric) report() {\n\tnow := s.nowFn()\n\ttimeSinceLastReport := now.Sub(s.lastReport)\n\n\tif timeSinceLastReport < s.reportInterval {\n\t\treturn\n\t}\n\n\tvar saturation float64\n\ttotal := timeSinceLastReport - s.lost\n\tif total != 0 {\n\t\tsaturation = float64(total-s.slept) / float64(total)\n\t\tsaturation = math.Round(saturation*100) / 100\n\t}\n\ts.reportFn(float32(saturation))\n\n\ts.slept = 0\n\ts.lost = 0\n\ts.lastReport = now\n}\n"
  },
  {
    "path": "saturation_test.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"testing\"\n\t\"time\"\n\n\t\"github.com/stretchr/testify/require\"\n)\n\nfunc TestSaturationMetric(t *testing.T) {\n\tt.Run(\"without smoothing\", func(t *testing.T) {\n\t\tsat := newSaturationMetric([]string{\"metric\"}, 100*time.Millisecond)\n\n\t\tnow := sat.lastReport\n\t\tsat.nowFn = func() time.Time { return now }\n\n\t\tvar reported float32\n\t\tsat.reportFn = func(val float32) { reported = val }\n\n\t\tsat.sleeping()\n\n\t\t// First window: 50ms sleeping + 75ms working.\n\t\tnow = now.Add(50 * time.Millisecond)\n\t\tsat.working()\n\n\t\tnow = now.Add(75 * time.Millisecond)\n\t\tsat.sleeping()\n\n\t\t// Should be 60% saturation.\n\t\trequire.Equal(t, float32(0.6), reported)\n\n\t\t// Second window: 90ms sleeping + 10ms working.\n\t\tnow = now.Add(90 * time.Millisecond)\n\t\tsat.working()\n\n\t\tnow = now.Add(10 * time.Millisecond)\n\t\tsat.sleeping()\n\n\t\t// Should be 10% saturation.\n\t\trequire.Equal(t, float32(0.1), reported)\n\n\t\t// Third window: 100ms sleeping + 0ms working.\n\t\tnow = now.Add(100 * time.Millisecond)\n\t\tsat.working()\n\n\t\t// Should be 0% saturation.\n\t\trequire.Equal(t, float32(0), reported)\n\t})\n}\n\nfunc TestSaturationMetric_IncorrectUsage(t *testing.T) {\n\tt.Run(\"calling sleeping() consecutively\", func(t *testing.T) {\n\t\tsat := newSaturationMetric([]string{\"metric\"}, 50*time.Millisecond)\n\n\t\tnow := sat.lastReport\n\t\tsat.nowFn = func() time.Time { return now }\n\n\t\tvar reported float32\n\t\tsat.reportFn = func(v float32) { reported = v }\n\n\t\t// Calling sleeping() consecutively should reset sleepBegan without recording\n\t\t// a sample, such that we \"lose\" time rather than recording nonsense data.\n\t\t//\n\t\t//   0   | sleeping() |\n\t\t//                     => Sleeping (10ms)\n\t\t// +10ms |  working() |\n\t\t//                     => Working  (10ms)\n\t\t// +20ms | sleeping() |\n\t\t//                     => [!] LOST [!] (10ms)\n\t\t// +30ms | sleeping() |\n\t\t//                     => Sleeping (10ms)\n\t\t// +40ms |  working() |\n\t\t//                     => Working (10ms)\n\t\t// +50ms | sleeping() |\n\t\t//\n\t\t// Total reportable time: 40ms. Saturation: 50%.\n\t\tsat.sleeping()\n\t\tnow = now.Add(10 * time.Millisecond)\n\t\tsat.working()\n\t\tnow = now.Add(10 * time.Millisecond)\n\t\tsat.sleeping()\n\t\tnow = now.Add(10 * time.Millisecond)\n\t\tsat.sleeping()\n\t\tnow = now.Add(10 * time.Millisecond)\n\t\tsat.working()\n\t\tnow = now.Add(10 * time.Millisecond)\n\t\tsat.sleeping()\n\n\t\trequire.Equal(t, float32(0.5), reported)\n\t})\n\n\tt.Run(\"calling working() consecutively\", func(t *testing.T) {\n\t\tsat := newSaturationMetric([]string{\"metric\"}, 30*time.Millisecond)\n\n\t\tnow := sat.lastReport\n\t\tsat.nowFn = func() time.Time { return now }\n\n\t\tvar reported float32\n\t\tsat.reportFn = func(v float32) { reported = v }\n\n\t\t// Calling working() consecutively should reset workBegan without recording\n\t\t// a sample, such that we \"lose\" time rather than recording nonsense data.\n\t\t//\n\t\t//   0   | sleeping() |\n\t\t//                     => Sleeping (10ms)\n\t\t// +10ms |  working() |\n\t\t//                     => [!] LOST [!] (10ms)\n\t\t// +20ms |  working() |\n\t\t//                     => Working (10ms)\n\t\t// +30ms | sleeping() |\n\t\t//\n\t\t// Total reportable time: 20ms. Saturation: 50%.\n\t\tsat.sleeping()\n\t\tnow = now.Add(10 * time.Millisecond)\n\t\tsat.working()\n\t\tnow = now.Add(10 * time.Millisecond)\n\t\tsat.working()\n\t\tnow = now.Add(10 * time.Millisecond)\n\t\tsat.sleeping()\n\n\t\trequire.Equal(t, float32(0.5), reported)\n\t})\n\n\tt.Run(\"calling working() first\", func(t *testing.T) {\n\t\tsat := newSaturationMetric([]string{\"metric\"}, 10*time.Millisecond)\n\n\t\tnow := sat.lastReport\n\t\tsat.nowFn = func() time.Time { return now }\n\n\t\tvar reported float32\n\t\tsat.reportFn = func(v float32) { reported = v }\n\n\t\t// Time from start until working() is treated as lost.\n\t\tsat.working()\n\t\trequire.Equal(t, float32(0), reported)\n\n\t\tsat.sleeping()\n\t\tnow = now.Add(5 * time.Millisecond)\n\t\tsat.working()\n\t\tnow = now.Add(5 * time.Millisecond)\n\t\tsat.sleeping()\n\t\trequire.Equal(t, float32(0.5), reported)\n\t})\n}\n"
  },
  {
    "path": "snapshot.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"fmt\"\n\t\"io\"\n\t\"time\"\n\n\t\"github.com/hashicorp/go-metrics/compat\"\n)\n\n// SnapshotMeta is for metadata of a snapshot.\ntype SnapshotMeta struct {\n\t// Version is the version number of the snapshot metadata. This does not cover\n\t// the application's data in the snapshot, that should be versioned\n\t// separately.\n\tVersion SnapshotVersion\n\n\t// ID is opaque to the store, and is used for opening.\n\tID string\n\n\t// Index and Term store when the snapshot was taken.\n\tIndex uint64\n\tTerm  uint64\n\n\t// Peers is deprecated and used to support version 0 snapshots, but will\n\t// be populated in version 1 snapshots as well to help with upgrades.\n\tPeers []byte\n\n\t// Configuration and ConfigurationIndex are present in version 1\n\t// snapshots and later.\n\tConfiguration      Configuration\n\tConfigurationIndex uint64\n\n\t// Size is the size of the snapshot in bytes.\n\tSize int64\n}\n\n// SnapshotStore interface is used to allow for flexible implementations\n// of snapshot storage and retrieval. For example, a client could implement\n// a shared state store such as S3, allowing new nodes to restore snapshots\n// without streaming from the leader.\ntype SnapshotStore interface {\n\t// Create is used to begin a snapshot at a given index and term, and with\n\t// the given committed configuration. The version parameter controls\n\t// which snapshot version to create.\n\tCreate(version SnapshotVersion, index, term uint64, configuration Configuration,\n\t\tconfigurationIndex uint64, trans Transport) (SnapshotSink, error)\n\n\t// List is used to list the available snapshots in the store.\n\t// It should return then in descending order, with the highest index first.\n\tList() ([]*SnapshotMeta, error)\n\n\t// Open takes a snapshot ID and provides a ReadCloser. Once close is\n\t// called it is assumed the snapshot is no longer needed.\n\tOpen(id string) (*SnapshotMeta, io.ReadCloser, error)\n}\n\n// SnapshotSink is returned by StartSnapshot. The FSM will Write state\n// to the sink and call Close on completion. On error, Cancel will be invoked.\ntype SnapshotSink interface {\n\tio.WriteCloser\n\tID() string\n\tCancel() error\n}\n\n// runSnapshots is a long running goroutine used to manage taking\n// new snapshots of the FSM. It runs in parallel to the FSM and\n// main goroutines, so that snapshots do not block normal operation.\nfunc (r *Raft) runSnapshots() {\n\tfor {\n\t\tselect {\n\t\tcase <-randomTimeout(r.config().SnapshotInterval):\n\t\t\t// Check if we should snapshot\n\t\t\tif !r.shouldSnapshot() {\n\t\t\t\tcontinue\n\t\t\t}\n\n\t\t\t// Trigger a snapshot\n\t\t\tif _, err := r.takeSnapshot(); err != nil {\n\t\t\t\tr.logger.Error(\"failed to take snapshot\", \"error\", err)\n\t\t\t}\n\n\t\tcase future := <-r.userSnapshotCh:\n\t\t\t// User-triggered, run immediately\n\t\t\tid, err := r.takeSnapshot()\n\t\t\tif err != nil {\n\t\t\t\tr.logger.Error(\"failed to take snapshot\", \"error\", err)\n\t\t\t} else {\n\t\t\t\tfuture.opener = func() (*SnapshotMeta, io.ReadCloser, error) {\n\t\t\t\t\treturn r.snapshots.Open(id)\n\t\t\t\t}\n\t\t\t}\n\t\t\tfuture.respond(err)\n\n\t\tcase <-r.shutdownCh:\n\t\t\treturn\n\t\t}\n\t}\n}\n\n// shouldSnapshot checks if we meet the conditions to take\n// a new snapshot.\nfunc (r *Raft) shouldSnapshot() bool {\n\t// Check the last snapshot index\n\tlastSnap, _ := r.getLastSnapshot()\n\n\t// Check the last log index\n\tlastIdx, err := r.logs.LastIndex()\n\tif err != nil {\n\t\tr.logger.Error(\"failed to get last log index\", \"error\", err)\n\t\treturn false\n\t}\n\n\t// Compare the delta to the threshold\n\tdelta := lastIdx - lastSnap\n\treturn delta >= r.config().SnapshotThreshold\n}\n\n// takeSnapshot is used to take a new snapshot. This must only be called from\n// the snapshot thread, never the main thread. This returns the ID of the new\n// snapshot, along with an error.\nfunc (r *Raft) takeSnapshot() (string, error) {\n\tdefer metrics.MeasureSince([]string{\"raft\", \"snapshot\", \"takeSnapshot\"}, time.Now())\n\n\t// Create a request for the FSM to perform a snapshot.\n\tsnapReq := &reqSnapshotFuture{}\n\tsnapReq.init()\n\n\t// Wait for dispatch or shutdown.\n\tselect {\n\tcase r.fsmSnapshotCh <- snapReq:\n\tcase <-r.shutdownCh:\n\t\treturn \"\", ErrRaftShutdown\n\t}\n\n\t// Wait until we get a response\n\tif err := snapReq.Error(); err != nil {\n\t\tif err != ErrNothingNewToSnapshot {\n\t\t\terr = fmt.Errorf(\"failed to start snapshot: %v\", err)\n\t\t}\n\t\treturn \"\", err\n\t}\n\tdefer snapReq.snapshot.Release()\n\n\t// Make a request for the configurations and extract the committed info.\n\t// We have to use the future here to safely get this information since\n\t// it is owned by the main thread.\n\tconfigReq := &configurationsFuture{}\n\tconfigReq.ShutdownCh = r.shutdownCh\n\tconfigReq.init()\n\tselect {\n\tcase r.configurationsCh <- configReq:\n\tcase <-r.shutdownCh:\n\t\treturn \"\", ErrRaftShutdown\n\t}\n\tif err := configReq.Error(); err != nil {\n\t\treturn \"\", err\n\t}\n\tcommitted := configReq.configurations.committed\n\tcommittedIndex := configReq.configurations.committedIndex\n\n\t// We don't support snapshots while there's a config change outstanding\n\t// since the snapshot doesn't have a means to represent this state. This\n\t// is a little weird because we need the FSM to apply an index that's\n\t// past the configuration change, even though the FSM itself doesn't see\n\t// the configuration changes. It should be ok in practice with normal\n\t// application traffic flowing through the FSM. If there's none of that\n\t// then it's not crucial that we snapshot, since there's not much going\n\t// on Raft-wise.\n\tif snapReq.index < committedIndex {\n\t\treturn \"\", fmt.Errorf(\"cannot take snapshot now, wait until the configuration entry at %v has been applied (have applied %v)\",\n\t\t\tcommittedIndex, snapReq.index)\n\t}\n\n\t// Create a new snapshot.\n\tr.logger.Info(\"starting snapshot up to\", \"index\", snapReq.index)\n\tstart := time.Now()\n\tversion := getSnapshotVersion(r.protocolVersion)\n\tsink, err := r.snapshots.Create(version, snapReq.index, snapReq.term, committed, committedIndex, r.trans)\n\tif err != nil {\n\t\treturn \"\", fmt.Errorf(\"failed to create snapshot: %v\", err)\n\t}\n\tmetrics.MeasureSince([]string{\"raft\", \"snapshot\", \"create\"}, start)\n\n\t// Try to persist the snapshot.\n\tstart = time.Now()\n\tif err := snapReq.snapshot.Persist(sink); err != nil {\n\t\t_ = sink.Cancel()\n\t\treturn \"\", fmt.Errorf(\"failed to persist snapshot: %v\", err)\n\t}\n\tmetrics.MeasureSince([]string{\"raft\", \"snapshot\", \"persist\"}, start)\n\n\t// Close and check for error.\n\tif err := sink.Close(); err != nil {\n\t\treturn \"\", fmt.Errorf(\"failed to close snapshot: %v\", err)\n\t}\n\n\t// Update the last stable snapshot info.\n\tr.setLastSnapshot(snapReq.index, snapReq.term)\n\n\t// Compact the logs.\n\tif err := r.compactLogs(snapReq.index); err != nil {\n\t\treturn \"\", err\n\t}\n\n\tr.logger.Info(\"snapshot complete up to\", \"index\", snapReq.index)\n\treturn sink.ID(), nil\n}\n\n// compactLogsWithTrailing takes the last inclusive index of a snapshot,\n// the lastLogIdx, and the trailingLogs and trims the logs that\n// are no longer needed.\nfunc (r *Raft) compactLogsWithTrailing(snapIdx uint64, lastLogIdx uint64, trailingLogs uint64) error {\n\t// Determine log ranges to compact\n\tminLog, err := r.logs.FirstIndex()\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to get first log index: %v\", err)\n\t}\n\n\t// Check if we have enough logs to truncate\n\t// Use a consistent value for trailingLogs for the duration of this method\n\t// call to avoid surprising behaviour.\n\tif lastLogIdx <= trailingLogs {\n\t\treturn nil\n\t}\n\n\t// Truncate up to the end of the snapshot, or `TrailingLogs`\n\t// back from the head, which ever is further back. This ensures\n\t// at least `TrailingLogs` entries, but does not allow logs\n\t// after the snapshot to be removed.\n\tmaxLog := min(snapIdx, lastLogIdx-trailingLogs)\n\n\tif minLog > maxLog {\n\t\tr.logger.Info(\"no logs to truncate\")\n\t\treturn nil\n\t}\n\n\tr.logger.Info(\"compacting logs\", \"from\", minLog, \"to\", maxLog)\n\n\t// Compact the logs\n\tif err := r.logs.DeleteRange(minLog, maxLog); err != nil {\n\t\treturn fmt.Errorf(\"log compaction failed: %v\", err)\n\t}\n\treturn nil\n}\n\n// compactLogs takes the last inclusive index of a snapshot\n// and trims the logs that are no longer needed.\nfunc (r *Raft) compactLogs(snapIdx uint64) error {\n\tdefer metrics.MeasureSince([]string{\"raft\", \"compactLogs\"}, time.Now())\n\n\tlastLogIdx, _ := r.getLastLog()\n\ttrailingLogs := r.config().TrailingLogs\n\n\treturn r.compactLogsWithTrailing(snapIdx, lastLogIdx, trailingLogs)\n}\n\n// removeOldLogs removes all old logs from the store. This is used for\n// MonotonicLogStores after restore. Callers should verify that the store\n// implementation is monotonic prior to calling.\nfunc (r *Raft) removeOldLogs() error {\n\tdefer metrics.MeasureSince([]string{\"raft\", \"removeOldLogs\"}, time.Now())\n\n\tlastLogIdx, err := r.logs.LastIndex()\n\tif err != nil {\n\t\treturn fmt.Errorf(\"failed to get last log index: %w\", err)\n\t}\n\n\tr.logger.Info(\"removing all old logs from log store\")\n\n\t// call compactLogsWithTrailing with lastLogIdx for snapIdx since\n\t// it will take the lesser of lastLogIdx and snapIdx to figure out\n\t// the end for which to apply trailingLogs.\n\treturn r.compactLogsWithTrailing(lastLogIdx, lastLogIdx, 0)\n}\n"
  },
  {
    "path": "stable.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\n// StableStore is used to provide stable storage\n// of key configurations to ensure safety.\ntype StableStore interface {\n\tSet(key []byte, val []byte) error\n\n\t// Get returns the value for key, or an empty byte slice if key was not found.\n\tGet(key []byte) ([]byte, error)\n\n\tSetUint64(key []byte, val uint64) error\n\n\t// GetUint64 returns the uint64 value for key, or 0 if key was not found.\n\tGetUint64(key []byte) (uint64, error)\n}\n"
  },
  {
    "path": "state.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"sync\"\n\t\"sync/atomic\"\n)\n\n// RaftState captures the state of a Raft node: Follower, Candidate, Leader,\n// or Shutdown.\ntype RaftState uint32\n\nconst (\n\t// Follower is the initial state of a Raft node.\n\tFollower RaftState = iota\n\n\t// Candidate is one of the valid states of a Raft node.\n\tCandidate\n\n\t// Leader is one of the valid states of a Raft node.\n\tLeader\n\n\t// Shutdown is the terminal state of a Raft node.\n\tShutdown\n)\n\nfunc (s RaftState) String() string {\n\tswitch s {\n\tcase Follower:\n\t\treturn \"Follower\"\n\tcase Candidate:\n\t\treturn \"Candidate\"\n\tcase Leader:\n\t\treturn \"Leader\"\n\tcase Shutdown:\n\t\treturn \"Shutdown\"\n\tdefault:\n\t\treturn \"Unknown\"\n\t}\n}\n\n// raftState is used to maintain various state variables\n// and provides an interface to set/get the variables in a\n// thread safe manner.\ntype raftState struct {\n\t// currentTerm commitIndex, lastApplied,  must be kept at the top of\n\t// the struct so they're 64 bit aligned which is a requirement for\n\t// atomic ops on 32 bit platforms.\n\n\t// The current term, cache of StableStore\n\tcurrentTerm uint64\n\n\t// Highest committed log entry\n\tcommitIndex uint64\n\n\t// Last applied log to the FSM\n\tlastApplied uint64\n\n\t// protects 4 next fields\n\tlastLock sync.Mutex\n\n\t// Cache the latest snapshot index/term\n\tlastSnapshotIndex uint64\n\tlastSnapshotTerm  uint64\n\n\t// Cache the latest log from LogStore\n\tlastLogIndex uint64\n\tlastLogTerm  uint64\n\n\t// Tracks running goroutines\n\troutinesGroup sync.WaitGroup\n\n\t// The current state\n\tstate RaftState\n}\n\nfunc (r *raftState) getState() RaftState {\n\tstateAddr := (*uint32)(&r.state)\n\treturn RaftState(atomic.LoadUint32(stateAddr))\n}\n\nfunc (r *raftState) setState(s RaftState) {\n\tstateAddr := (*uint32)(&r.state)\n\tatomic.StoreUint32(stateAddr, uint32(s))\n}\n\nfunc (r *raftState) getCurrentTerm() uint64 {\n\treturn atomic.LoadUint64(&r.currentTerm)\n}\n\nfunc (r *raftState) setCurrentTerm(term uint64) {\n\tatomic.StoreUint64(&r.currentTerm, term)\n}\n\nfunc (r *raftState) getLastLog() (index, term uint64) {\n\tr.lastLock.Lock()\n\tindex = r.lastLogIndex\n\tterm = r.lastLogTerm\n\tr.lastLock.Unlock()\n\treturn\n}\n\nfunc (r *raftState) setLastLog(index, term uint64) {\n\tr.lastLock.Lock()\n\tr.lastLogIndex = index\n\tr.lastLogTerm = term\n\tr.lastLock.Unlock()\n}\n\nfunc (r *raftState) getLastSnapshot() (index, term uint64) {\n\tr.lastLock.Lock()\n\tindex = r.lastSnapshotIndex\n\tterm = r.lastSnapshotTerm\n\tr.lastLock.Unlock()\n\treturn\n}\n\nfunc (r *raftState) setLastSnapshot(index, term uint64) {\n\tr.lastLock.Lock()\n\tr.lastSnapshotIndex = index\n\tr.lastSnapshotTerm = term\n\tr.lastLock.Unlock()\n}\n\nfunc (r *raftState) getCommitIndex() uint64 {\n\treturn atomic.LoadUint64(&r.commitIndex)\n}\n\nfunc (r *raftState) setCommitIndex(index uint64) {\n\tatomic.StoreUint64(&r.commitIndex, index)\n}\n\nfunc (r *raftState) getLastApplied() uint64 {\n\treturn atomic.LoadUint64(&r.lastApplied)\n}\n\nfunc (r *raftState) setLastApplied(index uint64) {\n\tatomic.StoreUint64(&r.lastApplied, index)\n}\n\n// Start a goroutine and properly handle the race between a routine\n// starting and incrementing, and exiting and decrementing.\nfunc (r *raftState) goFunc(f func()) {\n\tr.routinesGroup.Add(1)\n\tgo func() {\n\t\tdefer r.routinesGroup.Done()\n\t\tf()\n\t}()\n}\n\nfunc (r *raftState) waitShutdown() {\n\tr.routinesGroup.Wait()\n}\n\n// getLastIndex returns the last index in stable storage.\n// Either from the last log or from the last snapshot.\nfunc (r *raftState) getLastIndex() uint64 {\n\tr.lastLock.Lock()\n\tdefer r.lastLock.Unlock()\n\treturn max(r.lastLogIndex, r.lastSnapshotIndex)\n}\n\n// getLastEntry returns the last index and term in stable storage.\n// Either from the last log or from the last snapshot.\nfunc (r *raftState) getLastEntry() (uint64, uint64) {\n\tr.lastLock.Lock()\n\tdefer r.lastLock.Unlock()\n\tif r.lastLogIndex >= r.lastSnapshotIndex {\n\t\treturn r.lastLogIndex, r.lastLogTerm\n\t}\n\treturn r.lastSnapshotIndex, r.lastSnapshotTerm\n}\n"
  },
  {
    "path": "tag.sh",
    "content": "#!/usr/bin/env bash\n# Copyright IBM Corp. 2013, 2025\n# SPDX-License-Identifier: MPL-2.0\n\nset -e\n\n# The version must be supplied from the environment. Do not include the\n# leading \"v\".\nif [ -z $VERSION ]; then\n    echo \"Please specify a version.\"\n    exit 1\nfi\n\n# Generate the tag.\necho \"==> Tagging version $VERSION...\"\ngit commit --allow-empty -a --gpg-sign=348FFC4C -m \"Release v$VERSION\"\ngit tag -a -m \"Version $VERSION\" -s -u 348FFC4C \"v${VERSION}\" main\n\nexit 0\n"
  },
  {
    "path": "tcp_transport.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"errors\"\n\t\"io\"\n\t\"net\"\n\t\"time\"\n\n\t\"github.com/hashicorp/go-hclog\"\n)\n\nvar (\n\terrNotAdvertisable = errors.New(\"local bind address is not advertisable\")\n\terrNotTCP          = errors.New(\"local address is not a TCP address\")\n)\n\n// TCPStreamLayer implements StreamLayer interface for plain TCP.\ntype TCPStreamLayer struct {\n\tadvertise net.Addr\n\tlistener  *net.TCPListener\n}\n\n// NewTCPTransport returns a NetworkTransport that is built on top of\n// a TCP streaming transport layer.\nfunc NewTCPTransport(\n\tbindAddr string,\n\tadvertise net.Addr,\n\tmaxPool int,\n\ttimeout time.Duration,\n\tlogOutput io.Writer,\n) (*NetworkTransport, error) {\n\treturn newTCPTransport(bindAddr, advertise, func(stream StreamLayer) *NetworkTransport {\n\t\treturn NewNetworkTransport(stream, maxPool, timeout, logOutput)\n\t})\n}\n\n// NewTCPTransportWithLogger returns a NetworkTransport that is built on top of\n// a TCP streaming transport layer, with log output going to the supplied Logger\nfunc NewTCPTransportWithLogger(\n\tbindAddr string,\n\tadvertise net.Addr,\n\tmaxPool int,\n\ttimeout time.Duration,\n\tlogger hclog.Logger,\n) (*NetworkTransport, error) {\n\treturn newTCPTransport(bindAddr, advertise, func(stream StreamLayer) *NetworkTransport {\n\t\treturn NewNetworkTransportWithLogger(stream, maxPool, timeout, logger)\n\t})\n}\n\n// NewTCPTransportWithConfig returns a NetworkTransport that is built on top of\n// a TCP streaming transport layer, using the given config struct.\nfunc NewTCPTransportWithConfig(\n\tbindAddr string,\n\tadvertise net.Addr,\n\tconfig *NetworkTransportConfig,\n) (*NetworkTransport, error) {\n\treturn newTCPTransport(bindAddr, advertise, func(stream StreamLayer) *NetworkTransport {\n\t\tconfig.Stream = stream\n\t\treturn NewNetworkTransportWithConfig(config)\n\t})\n}\n\nfunc newTCPTransport(bindAddr string,\n\tadvertise net.Addr,\n\ttransportCreator func(stream StreamLayer) *NetworkTransport) (*NetworkTransport, error) {\n\t// Try to bind\n\tlist, err := net.Listen(\"tcp\", bindAddr)\n\tif err != nil {\n\t\treturn nil, err\n\t}\n\n\t// Create stream\n\tstream := &TCPStreamLayer{\n\t\tadvertise: advertise,\n\t\tlistener:  list.(*net.TCPListener),\n\t}\n\n\t// Verify that we have a usable advertise address\n\taddr, ok := stream.Addr().(*net.TCPAddr)\n\tif !ok {\n\t\t_ = list.Close()\n\t\treturn nil, errNotTCP\n\t}\n\tif addr.IP == nil || addr.IP.IsUnspecified() {\n\t\t_ = list.Close()\n\t\treturn nil, errNotAdvertisable\n\t}\n\n\t// Create the network transport\n\ttrans := transportCreator(stream)\n\treturn trans, nil\n}\n\n// Dial implements the StreamLayer interface.\nfunc (t *TCPStreamLayer) Dial(address ServerAddress, timeout time.Duration) (net.Conn, error) {\n\treturn net.DialTimeout(\"tcp\", string(address), timeout)\n}\n\n// Accept implements the net.Listener interface.\nfunc (t *TCPStreamLayer) Accept() (c net.Conn, err error) {\n\treturn t.listener.Accept()\n}\n\n// Close implements the net.Listener interface.\nfunc (t *TCPStreamLayer) Close() (err error) {\n\treturn t.listener.Close()\n}\n\n// Addr implements the net.Listener interface.\nfunc (t *TCPStreamLayer) Addr() net.Addr {\n\t// Use an advertise addr if provided\n\tif t.advertise != nil {\n\t\treturn t.advertise\n\t}\n\treturn t.listener.Addr()\n}\n"
  },
  {
    "path": "tcp_transport_test.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"net\"\n\t\"testing\"\n)\n\nfunc TestTCPTransport_BadAddr(t *testing.T) {\n\t_, err := NewTCPTransportWithLogger(\"0.0.0.0:0\", nil, 1, 0, newTestLogger(t))\n\tif err != errNotAdvertisable {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n}\n\nfunc TestTCPTransport_EmptyAddr(t *testing.T) {\n\t_, err := NewTCPTransportWithLogger(\":0\", nil, 1, 0, newTestLogger(t))\n\tif err != errNotAdvertisable {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n}\n\nfunc TestTCPTransport_WithAdvertise(t *testing.T) {\n\tips, err := net.LookupIP(\"localhost\")\n\tif err != nil {\n\t\tt.Fatal(err)\n\t}\n\tif len(ips) == 0 {\n\t\tt.Fatalf(\"localhost did not resolve to any IPs\")\n\t}\n\taddr := &net.TCPAddr{IP: ips[0], Port: 12345}\n\ttrans, err := NewTCPTransportWithLogger(\"0.0.0.0:0\", addr, 1, 0, newTestLogger(t))\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tif trans.LocalAddr() != ServerAddress(net.JoinHostPort(ips[0].String(), \"12345\")) {\n\t\tt.Fatalf(\"bad: %v\", trans.LocalAddr())\n\t}\n}\n"
  },
  {
    "path": "testing.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"bytes\"\n\t\"context\"\n\t\"fmt\"\n\t\"io\"\n\t\"os\"\n\t\"reflect\"\n\t\"sync\"\n\t\"testing\"\n\t\"time\"\n\n\t\"github.com/hashicorp/go-hclog\"\n\t\"github.com/hashicorp/go-msgpack/v2/codec\"\n)\n\nvar userSnapshotErrorsOnNoData = true\n\n// Return configurations optimized for in-memory\nfunc inmemConfig(t testing.TB) *Config {\n\tconf := DefaultConfig()\n\tconf.HeartbeatTimeout = 50 * time.Millisecond\n\tconf.ElectionTimeout = 50 * time.Millisecond\n\tconf.LeaderLeaseTimeout = 50 * time.Millisecond\n\tconf.CommitTimeout = 5 * time.Millisecond\n\tconf.Logger = newTestLogger(t)\n\treturn conf\n}\n\n// MockFSM is an implementation of the FSM interface, and just stores\n// the logs sequentially.\n//\n// NOTE: This is exposed for middleware testing purposes and is not a stable API\ntype MockFSM struct {\n\tsync.Mutex\n\tlogs           [][]byte\n\tconfigurations []Configuration\n}\n\n// NOTE: This is exposed for middleware testing purposes and is not a stable API\ntype MockFSMConfigStore struct {\n\tFSM\n}\n\n// NOTE: This is exposed for middleware testing purposes and is not a stable API\ntype WrappingFSM interface {\n\tUnderlying() FSM\n}\n\nfunc getMockFSM(fsm FSM) *MockFSM {\n\tswitch f := fsm.(type) {\n\tcase *MockFSM:\n\t\treturn f\n\tcase *MockFSMConfigStore:\n\t\treturn f.FSM.(*MockFSM)\n\tcase WrappingFSM:\n\t\treturn getMockFSM(f.Underlying())\n\t}\n\n\treturn nil\n}\n\n// NOTE: This is exposed for middleware testing purposes and is not a stable API\ntype MockSnapshot struct {\n\tlogs     [][]byte\n\tmaxIndex int\n}\n\nvar _ ConfigurationStore = (*MockFSMConfigStore)(nil)\n\n// NOTE: This is exposed for middleware testing purposes and is not a stable API\nfunc (m *MockFSM) Apply(log *Log) interface{} {\n\tm.Lock()\n\tdefer m.Unlock()\n\tm.logs = append(m.logs, log.Data)\n\treturn len(m.logs)\n}\n\n// NOTE: This is exposed for middleware testing purposes and is not a stable API\nfunc (m *MockFSM) Snapshot() (FSMSnapshot, error) {\n\tm.Lock()\n\tdefer m.Unlock()\n\treturn &MockSnapshot{m.logs, len(m.logs)}, nil\n}\n\n// NOTE: This is exposed for middleware testing purposes and is not a stable API\nfunc (m *MockFSM) Restore(inp io.ReadCloser) error {\n\tm.Lock()\n\tdefer m.Unlock()\n\tdefer func() { _ = inp.Close() }()\n\thd := codec.MsgpackHandle{}\n\tdec := codec.NewDecoder(inp, &hd)\n\n\tm.logs = nil\n\treturn dec.Decode(&m.logs)\n}\n\n// NOTE: This is exposed for middleware testing purposes and is not a stable API\nfunc (m *MockFSM) Logs() [][]byte {\n\tm.Lock()\n\tdefer m.Unlock()\n\treturn m.logs\n}\n\n// NOTE: This is exposed for middleware testing purposes and is not a stable API\nfunc (m *MockFSMConfigStore) StoreConfiguration(index uint64, config Configuration) {\n\tmm := m.FSM.(*MockFSM)\n\tmm.Lock()\n\tdefer mm.Unlock()\n\tmm.configurations = append(mm.configurations, config)\n}\n\n// NOTE: This is exposed for middleware testing purposes and is not a stable API\nfunc (m *MockSnapshot) Persist(sink SnapshotSink) error {\n\thd := codec.MsgpackHandle{}\n\tenc := codec.NewEncoder(sink, &hd)\n\tif err := enc.Encode(m.logs[:m.maxIndex]); err != nil {\n\t\t_ = sink.Cancel()\n\t\treturn err\n\t}\n\t_ = sink.Close()\n\treturn nil\n}\n\n// NOTE: This is exposed for middleware testing purposes and is not a stable API\nfunc (m *MockSnapshot) Release() {\n}\n\n// MockMonotonicLogStore is a LogStore wrapper for testing the\n// MonotonicLogStore interface.\ntype MockMonotonicLogStore struct {\n\ts LogStore\n}\n\n// IsMonotonic implements the MonotonicLogStore interface.\nfunc (m *MockMonotonicLogStore) IsMonotonic() bool {\n\treturn true\n}\n\n// FirstIndex implements the LogStore interface.\nfunc (m *MockMonotonicLogStore) FirstIndex() (uint64, error) {\n\treturn m.s.FirstIndex()\n}\n\n// LastIndex implements the LogStore interface.\nfunc (m *MockMonotonicLogStore) LastIndex() (uint64, error) {\n\treturn m.s.LastIndex()\n}\n\n// GetLog implements the LogStore interface.\nfunc (m *MockMonotonicLogStore) GetLog(index uint64, log *Log) error {\n\treturn m.s.GetLog(index, log)\n}\n\n// StoreLog implements the LogStore interface.\nfunc (m *MockMonotonicLogStore) StoreLog(log *Log) error {\n\treturn m.s.StoreLog(log)\n}\n\n// StoreLogs implements the LogStore interface.\nfunc (m *MockMonotonicLogStore) StoreLogs(logs []*Log) error {\n\treturn m.s.StoreLogs(logs)\n}\n\n// DeleteRange implements the LogStore interface.\nfunc (m *MockMonotonicLogStore) DeleteRange(min uint64, max uint64) error {\n\treturn m.s.DeleteRange(min, max)\n}\n\n// This can be used as the destination for a logger and it'll\n// map them into calls to testing.T.Log, so that you only see\n// the logging for failed tests.\ntype testLoggerAdapter struct {\n\ttb     testing.TB\n\tprefix string\n}\n\nfunc (a *testLoggerAdapter) Write(d []byte) (int, error) {\n\tif d[len(d)-1] == '\\n' {\n\t\td = d[:len(d)-1]\n\t}\n\tif a.prefix != \"\" {\n\t\tl := a.prefix + \": \" + string(d)\n\t\ta.tb.Log(l)\n\t\treturn len(l), nil\n\t}\n\n\ta.tb.Log(string(d))\n\treturn len(d), nil\n}\n\nfunc newTestLogger(tb testing.TB) hclog.Logger {\n\treturn newTestLoggerWithPrefix(tb, \"\")\n}\n\n// newTestLoggerWithPrefix returns a Logger that can be used in tests. prefix\n// will be added as the name of the logger.\n//\n// If tests are run with -v (verbose mode, or -json which implies verbose) the\n// log output will go to stderr directly. If tests are run in regular \"quiet\"\n// mode, logs will be sent to t.Log so that the logs only appear when a test\n// fails.\n//\n// Be careful where this is used though - calling t.Log after the test completes\n// causes a panic. This is common if you use it for a NetworkTransport for\n// example and then close the transport at the end of the test because an error\n// is logged after the test is complete.\nfunc newTestLoggerWithPrefix(tb testing.TB, prefix string) hclog.Logger {\n\tif testing.Verbose() {\n\t\treturn hclog.New(&hclog.LoggerOptions{Name: prefix, Level: hclog.Trace})\n\t}\n\n\treturn hclog.New(&hclog.LoggerOptions{\n\t\tName:   prefix,\n\t\tOutput: &testLoggerAdapter{tb: tb, prefix: prefix},\n\t})\n}\n\ntype cluster struct {\n\tdirs             []string\n\tstores           []*InmemStore\n\tfsms             []FSM\n\tsnaps            []*FileSnapshotStore\n\ttrans            []LoopbackTransport\n\trafts            []*Raft\n\tt                *testing.T\n\tobservationCh    chan Observation\n\tconf             *Config\n\tpropagateTimeout time.Duration\n\tlongstopTimeout  time.Duration\n\tlogger           hclog.Logger\n\tstartTime        time.Time\n\n\tfailedLock sync.Mutex\n\tfailedCh   chan struct{}\n\tfailed     bool\n}\n\nfunc (c *cluster) Merge(other *cluster) {\n\tc.dirs = append(c.dirs, other.dirs...)\n\tc.stores = append(c.stores, other.stores...)\n\tc.fsms = append(c.fsms, other.fsms...)\n\tc.snaps = append(c.snaps, other.snaps...)\n\tc.trans = append(c.trans, other.trans...)\n\tc.rafts = append(c.rafts, other.rafts...)\n}\n\nfunc (c *cluster) RemoveServer(id ServerID) {\n\tfor i, n := range c.rafts {\n\t\tif n.localID == id {\n\t\t\tc.rafts = append(c.rafts[:i], c.rafts[i+1:]...)\n\t\t\treturn\n\t\t}\n\t}\n}\n\n// notifyFailed will close the failed channel which can signal the goroutine\n// running the test that another goroutine has detected a failure in order to\n// terminate the test.\nfunc (c *cluster) notifyFailed() {\n\tc.failedLock.Lock()\n\tdefer c.failedLock.Unlock()\n\tif !c.failed {\n\t\tc.failed = true\n\t\tclose(c.failedCh)\n\t}\n}\n\n// Failf provides a logging function that fails the tests, prints the output\n// with microseconds, and does not mysteriously eat the string. This can be\n// safely called from goroutines but won't immediately halt the test. The\n// failedCh will be closed to allow blocking functions in the main thread to\n// detect the failure and react. Note that you should arrange for the main\n// thread to block until all goroutines have completed in order to reliably\n// fail tests using this function.\nfunc (c *cluster) Failf(format string, args ...interface{}) {\n\tc.logger.Error(fmt.Sprintf(format, args...))\n\tc.t.Fail()\n\tc.notifyFailed()\n}\n\n// FailNowf provides a logging function that fails the tests, prints the output\n// with microseconds, and does not mysteriously eat the string. FailNowf must be\n// called from the goroutine running the test or benchmark function, not from\n// other goroutines created during the test. Calling FailNowf does not stop\n// those other goroutines.\nfunc (c *cluster) FailNowf(format string, args ...interface{}) {\n\tc.t.Helper()\n\tc.t.Fatalf(format, args...)\n}\n\n// Close shuts down the cluster and cleans up.\nfunc (c *cluster) Close() {\n\tvar futures []Future\n\tfor _, r := range c.rafts {\n\t\tfutures = append(futures, r.Shutdown())\n\t}\n\n\t// Wait for shutdown\n\tlimit := time.AfterFunc(c.longstopTimeout, func() {\n\t\t// We can't FailNowf here, and c.Failf won't do anything if we\n\t\t// hang, so panic.\n\t\tpanic(\"timed out waiting for shutdown\")\n\t})\n\tdefer limit.Stop()\n\n\tfor _, f := range futures {\n\t\tif err := f.Error(); err != nil {\n\t\t\tc.t.Fatalf(\"shutdown future err: %v\", err)\n\t\t}\n\t}\n\n\tfor _, d := range c.dirs {\n\t\t_ = os.RemoveAll(d)\n\t}\n}\n\n// WaitEventChan returns a channel which will signal if an observation is made\n// or a timeout occurs. It is possible to set a filter to look for specific\n// observations. Setting timeout to 0 means that it will wait forever until a\n// non-filtered observation is made.\nfunc (c *cluster) WaitEventChan(ctx context.Context, filter FilterFn) <-chan struct{} {\n\tch := make(chan struct{})\n\tgo func() {\n\t\tdefer close(ch)\n\t\tfor {\n\t\t\tselect {\n\t\t\tcase <-ctx.Done():\n\t\t\t\treturn\n\t\t\tcase o, ok := <-c.observationCh:\n\t\t\t\tif !ok || filter == nil || filter(&o) {\n\t\t\t\t\treturn\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}()\n\treturn ch\n}\n\n// WaitEvent waits until an observation is made, a timeout occurs, or a test\n// failure is signaled. It is possible to set a filter to look for specific\n// observations. Setting timeout to 0 means that it will wait forever until a\n// non-filtered observation is made or a test failure is signaled.\nfunc (c *cluster) WaitEvent(filter FilterFn, timeout time.Duration) {\n\tctx, cancel := context.WithTimeout(context.Background(), timeout)\n\tdefer cancel()\n\teventCh := c.WaitEventChan(ctx, filter)\n\tselect {\n\tcase <-c.failedCh:\n\t\tc.t.FailNow()\n\tcase <-eventCh:\n\t}\n}\n\n// WaitForReplication blocks until every FSM in the cluster has the given\n// length, or the long sanity check timeout expires.\nfunc (c *cluster) WaitForReplication(fsmLength int) {\n\tlimitCh := time.After(c.longstopTimeout)\n\nCHECK:\n\tfor {\n\t\tctx, cancel := context.WithTimeout(context.Background(), c.conf.CommitTimeout)\n\t\tdefer cancel()\n\t\tch := c.WaitEventChan(ctx, nil)\n\t\tselect {\n\t\tcase <-c.failedCh:\n\t\t\tc.t.FailNow()\n\n\t\tcase <-limitCh:\n\t\t\tc.t.Fatalf(\"timeout waiting for replication\")\n\n\t\tcase <-ch:\n\t\t\tfor _, fsmRaw := range c.fsms {\n\t\t\t\tfsm := getMockFSM(fsmRaw)\n\t\t\t\tfsm.Lock()\n\t\t\t\tnum := len(fsm.logs)\n\t\t\t\tfsm.Unlock()\n\t\t\t\tif num != fsmLength {\n\t\t\t\t\tcontinue CHECK\n\t\t\t\t}\n\t\t\t}\n\t\t\treturn\n\t\t}\n\t}\n}\n\n// pollState takes a snapshot of the state of the cluster. This might not be\n// stable, so use GetInState() to apply some additional checks when waiting\n// for the cluster to achieve a particular state.\nfunc (c *cluster) pollState(s RaftState) ([]*Raft, uint64) {\n\tvar highestTerm uint64\n\tin := make([]*Raft, 0, 1)\n\tfor _, r := range c.rafts {\n\t\tif r.State() == s {\n\t\t\tin = append(in, r)\n\t\t}\n\t\tterm := r.getCurrentTerm()\n\t\tif term > highestTerm {\n\t\t\thighestTerm = term\n\t\t}\n\t}\n\treturn in, highestTerm\n}\n\n// GetInState polls the state of the cluster and attempts to identify when it has\n// settled into the given state.\nfunc (c *cluster) GetInState(s RaftState) []*Raft {\n\tc.logger.Info(\"starting stability test\", \"raft-state\", s)\n\tlimitCh := time.After(c.longstopTimeout)\n\n\t// An election should complete after 2 * max(HeartbeatTimeout, ElectionTimeout)\n\t// because of the randomised timer expiring in 1 x interval ... 2 x interval.\n\t// We add a bit for propagation delay. If the election fails (e.g. because\n\t// two elections start at once), we will have got something through our\n\t// observer channel indicating a different state (i.e. one of the nodes\n\t// will have moved to candidate state) which will reset the timer.\n\t//\n\t// Because of an implementation peculiarity, it can actually be 3 x timeout.\n\ttimeout := c.conf.HeartbeatTimeout\n\tif timeout < c.conf.ElectionTimeout {\n\t\ttimeout = c.conf.ElectionTimeout\n\t}\n\ttimeout = 2*timeout + c.conf.CommitTimeout\n\ttimer := time.NewTimer(timeout)\n\tdefer timer.Stop()\n\n\t// Wait until we have a stable instate slice. Each time we see an\n\t// observation a state has changed, recheck it and if it has changed,\n\t// restart the timer.\n\tpollStartTime := time.Now()\n\tfor {\n\t\t_, highestTerm := c.pollState(s)\n\t\tinStateTime := time.Now()\n\n\t\t// Sometimes this routine is called very early on before the\n\t\t// rafts have started up. We then timeout even though no one has\n\t\t// even started an election. So if the highest term in use is\n\t\t// zero, we know there are no raft processes that have yet issued\n\t\t// a RequestVote, and we set a long time out. This is fixed when\n\t\t// we hear the first RequestVote, at which point we reset the\n\t\t// timer.\n\t\tif highestTerm == 0 {\n\t\t\ttimer.Reset(c.longstopTimeout)\n\t\t} else {\n\t\t\ttimer.Reset(timeout)\n\t\t}\n\n\t\t// Filter will wake up whenever we observe a RequestVote.\n\t\tfilter := func(ob *Observation) bool {\n\t\t\tswitch ob.Data.(type) {\n\t\t\tcase RaftState:\n\t\t\t\treturn true\n\t\t\tcase RequestVoteRequest:\n\t\t\t\treturn true\n\t\t\tdefault:\n\t\t\t\treturn false\n\t\t\t}\n\t\t}\n\n\t\tctx, cancel := context.WithCancel(context.Background())\n\t\tdefer cancel()\n\t\teventCh := c.WaitEventChan(ctx, filter)\n\t\tselect {\n\t\tcase <-c.failedCh:\n\t\t\tc.t.FailNow()\n\n\t\tcase <-limitCh:\n\t\t\tc.t.Fatalf(\"timeout waiting for stable %s state\", s)\n\n\t\tcase <-eventCh:\n\t\t\tc.logger.Debug(\"resetting stability timeout\")\n\n\t\tcase t, ok := <-timer.C:\n\t\t\tif !ok {\n\t\t\t\tc.t.Fatalf(\"timer channel errored\")\n\t\t\t}\n\n\t\t\tinState, highestTerm := c.pollState(s)\n\t\t\tc.logger.Info(fmt.Sprintf(\"stable state for %s reached at %s (%d nodes), highestTerm is %d, %s from start of poll, %s from cluster start. Timeout at %s, %s after stability\",\n\t\t\t\ts, inStateTime, len(inState), highestTerm, inStateTime.Sub(pollStartTime), inStateTime.Sub(c.startTime), t, t.Sub(inStateTime)))\n\t\t\treturn inState\n\t\t}\n\t}\n}\n\n// Leader waits for the cluster to elect a leader and stay in a stable state.\nfunc (c *cluster) Leader() *Raft {\n\tc.t.Helper()\n\tleaders := c.GetInState(Leader)\n\tif len(leaders) != 1 {\n\t\tc.t.Fatalf(\"expected one leader: %v\", leaders)\n\t}\n\treturn leaders[0]\n}\n\n// Followers waits for the cluster to have N-1 followers and stay in a stable\n// state.\nfunc (c *cluster) Followers() []*Raft {\n\texpFollowers := len(c.rafts) - 1\n\treturn c.WaitForFollowers(expFollowers)\n}\n\n// WaitForFollowers waits for the cluster to have a given number of followers and stay in a stable\n// state.\nfunc (c *cluster) WaitForFollowers(expFollowers int) []*Raft {\n\tfollowers := c.GetInState(Follower)\n\tif len(followers) != expFollowers {\n\t\tc.t.Fatalf(\"timeout waiting for %d followers (followers are %v)\", expFollowers, followers)\n\t}\n\treturn followers\n}\n\n// FullyConnect connects all the transports together.\nfunc (c *cluster) FullyConnect() {\n\tc.logger.Debug(\"fully connecting\")\n\tfor i, t1 := range c.trans {\n\t\tfor j, t2 := range c.trans {\n\t\t\tif i != j {\n\t\t\t\tt1.Connect(t2.LocalAddr(), t2)\n\t\t\t\tt2.Connect(t1.LocalAddr(), t1)\n\t\t\t}\n\t\t}\n\t}\n}\n\n// Disconnect disconnects all transports from the given address.\nfunc (c *cluster) Disconnect(a ServerAddress) {\n\tc.logger.Debug(\"disconnecting\", \"address\", a)\n\tfor _, t := range c.trans {\n\t\tif t.LocalAddr() == a {\n\t\t\tt.DisconnectAll()\n\t\t} else {\n\t\t\tt.Disconnect(a)\n\t\t}\n\t}\n}\n\n// Partition keeps the given list of addresses connected but isolates them\n// from the other members of the cluster.\nfunc (c *cluster) Partition(far []ServerAddress) {\n\tc.logger.Debug(\"partitioning\", \"addresses\", far)\n\n\t// Gather the set of nodes on the \"near\" side of the partition (we\n\t// will call the supplied list of nodes the \"far\" side).\n\tnear := make(map[ServerAddress]struct{})\nOUTER:\n\tfor _, t := range c.trans {\n\t\tl := t.LocalAddr()\n\t\tfor _, a := range far {\n\t\t\tif l == a {\n\t\t\t\tcontinue OUTER\n\t\t\t}\n\t\t}\n\t\tnear[l] = struct{}{}\n\t}\n\n\t// Now fixup all the connections. The near side will be separated from\n\t// the far side, and vice-versa.\n\tfor _, t := range c.trans {\n\t\tl := t.LocalAddr()\n\t\tif _, ok := near[l]; ok {\n\t\t\tfor _, a := range far {\n\t\t\t\tt.Disconnect(a)\n\t\t\t}\n\t\t} else {\n\t\t\tfor a := range near {\n\t\t\t\tt.Disconnect(a)\n\t\t\t}\n\t\t}\n\t}\n}\n\n// IndexOf returns the index of the given raft instance.\nfunc (c *cluster) IndexOf(r *Raft) int {\n\tfor i, n := range c.rafts {\n\t\tif n == r {\n\t\t\treturn i\n\t\t}\n\t}\n\treturn -1\n}\n\n// EnsureLeader checks that ALL the nodes think the leader is the given expected\n// leader.\nfunc (c *cluster) EnsureLeader(t *testing.T, expect ServerAddress) {\n\t// We assume c.Leader() has been called already; now check all the rafts\n\t// think the leader is correct\n\tfail := false\n\tfor _, r := range c.rafts {\n\t\tleaderAddr, _ := r.LeaderWithID()\n\n\t\tif leaderAddr != expect {\n\t\t\tif leaderAddr == \"\" {\n\t\t\t\tleaderAddr = \"[none]\"\n\t\t\t}\n\t\t\tif expect == \"\" {\n\t\t\t\tc.logger.Error(\"peer sees incorrect leader\", \"peer\", r, \"leader\", leaderAddr, \"expected-leader\", \"[none]\")\n\t\t\t} else {\n\t\t\t\tc.logger.Error(\"peer sees incorrect leader\", \"peer\", r, \"leader\", leaderAddr, \"expected-leader\", expect)\n\t\t\t}\n\t\t\tfail = true\n\t\t}\n\t}\n\tif fail {\n\t\tt.Fatalf(\"at least one peer has the wrong notion of leader\")\n\t}\n}\n\n// EnsureSame makes sure all the FSMs have the same contents.\nfunc (c *cluster) EnsureSame(t *testing.T) {\n\tlimit := time.Now().Add(c.longstopTimeout)\n\tfirst := getMockFSM(c.fsms[0])\n\nCHECK:\n\tfirst.Lock()\n\tfor i, fsmRaw := range c.fsms {\n\t\tfsm := getMockFSM(fsmRaw)\n\t\tif i == 0 {\n\t\t\tcontinue\n\t\t}\n\t\tfsm.Lock()\n\n\t\tif len(first.logs) != len(fsm.logs) {\n\t\t\tfsm.Unlock()\n\t\t\tif time.Now().After(limit) {\n\t\t\t\tt.Fatalf(\"FSM log length mismatch: %d %d\",\n\t\t\t\t\tlen(first.logs), len(fsm.logs))\n\t\t\t} else {\n\t\t\t\tgoto WAIT\n\t\t\t}\n\t\t}\n\n\t\tfor idx := 0; idx < len(first.logs); idx++ {\n\t\t\tif !bytes.Equal(first.logs[idx], fsm.logs[idx]) {\n\t\t\t\tfsm.Unlock()\n\t\t\t\tif time.Now().After(limit) {\n\t\t\t\t\tt.Fatalf(\"FSM log mismatch at index %d\", idx)\n\t\t\t\t} else {\n\t\t\t\t\tgoto WAIT\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t\tif len(first.configurations) != len(fsm.configurations) {\n\t\t\tfsm.Unlock()\n\t\t\tif time.Now().After(limit) {\n\t\t\t\tt.Fatalf(\"FSM configuration length mismatch: %d %d\",\n\t\t\t\t\tlen(first.logs), len(fsm.logs))\n\t\t\t} else {\n\t\t\t\tgoto WAIT\n\t\t\t}\n\t\t}\n\n\t\tfor idx := 0; idx < len(first.configurations); idx++ {\n\t\t\tif !reflect.DeepEqual(first.configurations[idx], fsm.configurations[idx]) {\n\t\t\t\tfsm.Unlock()\n\t\t\t\tif time.Now().After(limit) {\n\t\t\t\t\tt.Fatalf(\"FSM configuration mismatch at index %d: %v, %v\", idx, first.configurations[idx], fsm.configurations[idx])\n\t\t\t\t} else {\n\t\t\t\t\tgoto WAIT\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t\tfsm.Unlock()\n\t}\n\n\tfirst.Unlock()\n\treturn\n\nWAIT:\n\tfirst.Unlock()\n\tc.WaitEvent(nil, c.conf.CommitTimeout)\n\tgoto CHECK\n}\n\n// getConfiguration returns the configuration of the given Raft instance, or\n// fails the test if there's an error\nfunc (c *cluster) getConfiguration(r *Raft) Configuration {\n\tfuture := r.GetConfiguration()\n\tif err := future.Error(); err != nil {\n\t\tc.t.Fatalf(\"failed to get configuration: %v\", err)\n\t\treturn Configuration{}\n\t}\n\n\treturn future.Configuration()\n}\n\n// EnsureSamePeers makes sure all the rafts have the same set of peers.\nfunc (c *cluster) EnsureSamePeers(t *testing.T) {\n\tlimit := time.Now().Add(c.longstopTimeout)\n\tpeerSet := c.getConfiguration(c.rafts[0])\n\nCHECK:\n\tfor i, raft := range c.rafts {\n\t\tif i == 0 {\n\t\t\tcontinue\n\t\t}\n\n\t\totherSet := c.getConfiguration(raft)\n\t\tif !reflect.DeepEqual(peerSet, otherSet) {\n\t\t\tif time.Now().After(limit) {\n\t\t\t\tt.Fatalf(\"peer mismatch: %+v %+v\", peerSet, otherSet)\n\t\t\t} else {\n\t\t\t\tgoto WAIT\n\t\t\t}\n\t\t}\n\t}\n\treturn\n\nWAIT:\n\tc.WaitEvent(nil, c.conf.CommitTimeout)\n\tgoto CHECK\n}\n\n// NOTE: This is exposed for middleware testing purposes and is not a stable API\ntype MakeClusterOpts struct {\n\tPeers           int\n\tBootstrap       bool\n\tConf            *Config\n\tConfigStoreFSM  bool\n\tMakeFSMFunc     func() FSM\n\tLongstopTimeout time.Duration\n\tMonotonicLogs   bool\n}\n\n// makeCluster will return a cluster with the given config and number of peers.\n// If bootstrap is true, the servers will know about each other before starting,\n// otherwise their transports will be wired up but they won't yet have configured\n// each other.\nfunc makeCluster(t *testing.T, opts *MakeClusterOpts) *cluster {\n\tif opts.Conf == nil {\n\t\topts.Conf = inmemConfig(t)\n\t}\n\n\tc := &cluster{\n\t\tobservationCh: make(chan Observation, 1024),\n\t\tconf:          opts.Conf,\n\t\t// Propagation takes a maximum of 2 heartbeat timeouts (time to\n\t\t// get a new heartbeat that would cause a commit) plus a bit.\n\t\tpropagateTimeout: opts.Conf.HeartbeatTimeout*2 + opts.Conf.CommitTimeout,\n\t\tlongstopTimeout:  5 * time.Second,\n\t\tlogger:           newTestLoggerWithPrefix(t, \"cluster\"),\n\t\tfailedCh:         make(chan struct{}),\n\t}\n\tif opts.LongstopTimeout > 0 {\n\t\tc.longstopTimeout = opts.LongstopTimeout\n\t}\n\n\tc.t = t\n\tvar configuration Configuration\n\n\t// Setup the stores and transports\n\tfor i := 0; i < opts.Peers; i++ {\n\t\tdir, err := os.MkdirTemp(\"\", \"raft\")\n\t\tif err != nil {\n\t\t\tt.Fatalf(\"err: %v\", err)\n\t\t}\n\n\t\tstore := NewInmemStore()\n\t\tc.dirs = append(c.dirs, dir)\n\t\tc.stores = append(c.stores, store)\n\t\tif opts.ConfigStoreFSM {\n\t\t\tc.fsms = append(c.fsms, &MockFSMConfigStore{\n\t\t\t\tFSM: &MockFSM{},\n\t\t\t})\n\t\t} else {\n\t\t\tvar fsm FSM\n\t\t\tif opts.MakeFSMFunc != nil {\n\t\t\t\tfsm = opts.MakeFSMFunc()\n\t\t\t} else {\n\t\t\t\tfsm = &MockFSM{}\n\t\t\t}\n\t\t\tc.fsms = append(c.fsms, fsm)\n\t\t}\n\n\t\tdir2, snap := FileSnapTest(t)\n\t\tc.dirs = append(c.dirs, dir2)\n\t\tc.snaps = append(c.snaps, snap)\n\n\t\taddr, trans := NewInmemTransport(\"\")\n\t\tc.trans = append(c.trans, trans)\n\t\tlocalID := ServerID(fmt.Sprintf(\"server-%s\", addr))\n\t\tif opts.Conf.ProtocolVersion < 3 {\n\t\t\tlocalID = ServerID(addr)\n\t\t}\n\t\tconfiguration.Servers = append(configuration.Servers, Server{\n\t\t\tSuffrage: Voter,\n\t\t\tID:       localID,\n\t\t\tAddress:  addr,\n\t\t})\n\t}\n\n\t// Wire the transports together\n\tc.FullyConnect()\n\n\t// Create all the rafts\n\tc.startTime = time.Now()\n\tfor i := 0; i < opts.Peers; i++ {\n\t\tvar logs LogStore\n\t\tlogs = c.stores[i]\n\t\tstore := c.stores[i]\n\t\tsnap := c.snaps[i]\n\t\ttrans := c.trans[i]\n\n\t\tif opts.MonotonicLogs {\n\t\t\tlogs = &MockMonotonicLogStore{s: logs}\n\t\t}\n\n\t\tpeerConf := opts.Conf\n\t\tpeerConf.LocalID = configuration.Servers[i].ID\n\t\tpeerConf.Logger = newTestLoggerWithPrefix(t, string(configuration.Servers[i].ID))\n\n\t\tif opts.Bootstrap {\n\t\t\terr := BootstrapCluster(peerConf, logs, store, snap, trans, configuration)\n\t\t\tif err != nil {\n\t\t\t\tt.Fatalf(\"BootstrapCluster failed: %v\", err)\n\t\t\t}\n\t\t}\n\n\t\traft, err := NewRaft(peerConf, c.fsms[i], logs, store, snap, trans)\n\t\tif err != nil {\n\t\t\tt.Fatalf(\"NewRaft failed: %v\", err)\n\t\t}\n\n\t\traft.RegisterObserver(NewObserver(c.observationCh, false, nil))\n\t\tif err != nil {\n\t\t\tt.Fatalf(\"RegisterObserver failed: %v\", err)\n\t\t}\n\t\tc.rafts = append(c.rafts, raft)\n\t}\n\n\treturn c\n}\n\n// NOTE: This is exposed for middleware testing purposes and is not a stable API\nfunc MakeCluster(n int, t *testing.T, conf *Config) *cluster {\n\treturn makeCluster(t, &MakeClusterOpts{\n\t\tPeers:     n,\n\t\tBootstrap: true,\n\t\tConf:      conf,\n\t})\n}\n\n// NOTE: This is exposed for middleware testing purposes and is not a stable API\nfunc MakeClusterNoBootstrap(n int, t *testing.T, conf *Config) *cluster {\n\treturn makeCluster(t, &MakeClusterOpts{\n\t\tPeers: n,\n\t\tConf:  conf,\n\t})\n}\n\n// NOTE: This is exposed for middleware testing purposes and is not a stable API\nfunc MakeClusterCustom(t *testing.T, opts *MakeClusterOpts) *cluster {\n\treturn makeCluster(t, opts)\n}\n\n// NOTE: This is exposed for middleware testing purposes and is not a stable API\nfunc FileSnapTest(t *testing.T) (string, *FileSnapshotStore) {\n\t// Create a test dir\n\tdir, err := os.MkdirTemp(\"\", \"raft\")\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v \", err)\n\t}\n\n\tsnap, err := NewFileSnapshotStoreWithLogger(dir, 3, newTestLogger(t))\n\tif err != nil {\n\t\tt.Fatalf(\"err: %v\", err)\n\t}\n\tsnap.noSync = true\n\treturn dir, snap\n}\n"
  },
  {
    "path": "testing_batch.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\n//go:build batchtest\n\npackage raft\n\nfunc init() {\n\tuserSnapshotErrorsOnNoData = false\n}\n\n// ApplyBatch enables MockFSM to satisfy the BatchingFSM interface. This\n// function is gated by the batchtest build flag.\n//\n// NOTE: This is exposed for middleware testing purposes and is not a stable API\nfunc (m *MockFSM) ApplyBatch(logs []*Log) []interface{} {\n\tm.Lock()\n\tdefer m.Unlock()\n\n\tret := make([]interface{}, len(logs))\n\tfor i, log := range logs {\n\t\tswitch log.Type {\n\t\tcase LogCommand:\n\t\t\tm.logs = append(m.logs, log.Data)\n\t\t\tret[i] = len(m.logs)\n\t\tdefault:\n\t\t\tret[i] = nil\n\t\t}\n\t}\n\n\treturn ret\n}\n"
  },
  {
    "path": "transport.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"io\"\n\t\"time\"\n)\n\n// RPCResponse captures both a response and a potential error.\ntype RPCResponse struct {\n\tResponse interface{}\n\tError    error\n}\n\n// RPC has a command, and provides a response mechanism.\ntype RPC struct {\n\tCommand  interface{}\n\tReader   io.Reader // Set only for InstallSnapshot\n\tRespChan chan<- RPCResponse\n}\n\n// Respond is used to respond with a response, error or both\nfunc (r *RPC) Respond(resp interface{}, err error) {\n\tr.RespChan <- RPCResponse{resp, err}\n}\n\n// Transport provides an interface for network transports\n// to allow Raft to communicate with other nodes.\ntype Transport interface {\n\t// Consumer returns a channel that can be used to\n\t// consume and respond to RPC requests.\n\tConsumer() <-chan RPC\n\n\t// LocalAddr is used to return our local address to distinguish from our peers.\n\tLocalAddr() ServerAddress\n\n\t// AppendEntriesPipeline returns an interface that can be used to pipeline\n\t// AppendEntries requests.\n\tAppendEntriesPipeline(id ServerID, target ServerAddress) (AppendPipeline, error)\n\n\t// AppendEntries sends the appropriate RPC to the target node.\n\tAppendEntries(id ServerID, target ServerAddress, args *AppendEntriesRequest, resp *AppendEntriesResponse) error\n\n\t// RequestVote sends the appropriate RPC to the target node.\n\tRequestVote(id ServerID, target ServerAddress, args *RequestVoteRequest, resp *RequestVoteResponse) error\n\n\t// InstallSnapshot is used to push a snapshot down to a follower. The data is read from\n\t// the ReadCloser and streamed to the client.\n\tInstallSnapshot(id ServerID, target ServerAddress, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error\n\n\t// EncodePeer is used to serialize a peer's address.\n\tEncodePeer(id ServerID, addr ServerAddress) []byte\n\n\t// DecodePeer is used to deserialize a peer's address.\n\tDecodePeer([]byte) ServerAddress\n\n\t// SetHeartbeatHandler is used to setup a heartbeat handler\n\t// as a fast-pass. This is to avoid head-of-line blocking from\n\t// disk IO. If a Transport does not support this, it can simply\n\t// ignore the call, and push the heartbeat onto the Consumer channel.\n\tSetHeartbeatHandler(cb func(rpc RPC))\n\n\t// TimeoutNow is used to start a leadership transfer to the target node.\n\tTimeoutNow(id ServerID, target ServerAddress, args *TimeoutNowRequest, resp *TimeoutNowResponse) error\n}\n\n// WithPreVote is an interface that a transport may provide which\n// allows a transport to support a PreVote request.\n//\n// It is defined separately from Transport as unfortunately it wasn't in the\n// original interface specification.\ntype WithPreVote interface {\n\t// RequestPreVote sends the appropriate RPC to the target node.\n\tRequestPreVote(id ServerID, target ServerAddress, args *RequestPreVoteRequest, resp *RequestPreVoteResponse) error\n}\n\n// WithClose is an interface that a transport may provide which\n// allows a transport to be shut down cleanly when a Raft instance\n// shuts down.\n//\n// It is defined separately from Transport as unfortunately it wasn't in the\n// original interface specification.\ntype WithClose interface {\n\t// Close permanently closes a transport, stopping\n\t// any associated goroutines and freeing other resources.\n\tClose() error\n}\n\n// LoopbackTransport is an interface that provides a loopback transport suitable for testing\n// e.g. InmemTransport. It's there so we don't have to rewrite tests.\ntype LoopbackTransport interface {\n\tTransport   // Embedded transport reference\n\tWithPeers   // Embedded peer management\n\tWithClose   // with a close routine\n\tWithPreVote // with a prevote\n}\n\n// WithPeers is an interface that a transport may provide which allows for connection and\n// disconnection. Unless the transport is a loopback transport, the transport specified to\n// \"Connect\" is likely to be nil.\ntype WithPeers interface {\n\tConnect(peer ServerAddress, t Transport) // Connect a peer\n\tDisconnect(peer ServerAddress)           // Disconnect a given peer\n\tDisconnectAll()                          // Disconnect all peers, possibly to reconnect them later\n}\n\n// AppendPipeline is used for pipelining AppendEntries requests. It is used\n// to increase the replication throughput by masking latency and better\n// utilizing bandwidth.\ntype AppendPipeline interface {\n\t// AppendEntries is used to add another request to the pipeline.\n\t// The send may block which is an effective form of back-pressure.\n\tAppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error)\n\n\t// Consumer returns a channel that can be used to consume\n\t// response futures when they are ready.\n\tConsumer() <-chan AppendFuture\n\n\t// Close closes the pipeline and cancels all inflight RPCs\n\tClose() error\n}\n\n// AppendFuture is used to return information about a pipelined AppendEntries request.\ntype AppendFuture interface {\n\tFuture\n\n\t// Start returns the time that the append request was started.\n\t// It is always OK to call this method.\n\tStart() time.Time\n\n\t// Request holds the parameters of the AppendEntries call.\n\t// It is always OK to call this method.\n\tRequest() *AppendEntriesRequest\n\n\t// Response holds the results of the AppendEntries call.\n\t// This method must only be called after the Error\n\t// method returns, and will only be valid on success.\n\tResponse() *AppendEntriesResponse\n}\n"
  },
  {
    "path": "transport_test.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"bytes\"\n\t\"reflect\"\n\t\"testing\"\n\t\"time\"\n)\n\nconst (\n\tTTInmem = iota\n\n\t// NOTE: must be last\n\tnumTestTransports\n)\n\nfunc NewTestTransport(ttype int, addr ServerAddress) (ServerAddress, LoopbackTransport) {\n\tswitch ttype {\n\tcase TTInmem:\n\t\treturn NewInmemTransport(addr)\n\tdefault:\n\t\tpanic(\"Unknown transport type\")\n\t}\n}\n\nfunc TestTransport_StartStop(t *testing.T) {\n\tfor ttype := 0; ttype < numTestTransports; ttype++ {\n\t\t_, trans := NewTestTransport(ttype, \"\")\n\t\tif err := trans.Close(); err != nil {\n\t\t\tt.Fatalf(\"err: %v\", err)\n\t\t}\n\t}\n}\n\nfunc TestTransport_AppendEntries(t *testing.T) {\n\tfor ttype := 0; ttype < numTestTransports; ttype++ {\n\t\taddr1, trans1 := NewTestTransport(ttype, \"\")\n\t\tdefer func() { _ = trans1.Close() }()\n\t\trpcCh := trans1.Consumer()\n\n\t\t// Make the RPC request\n\t\targs := AppendEntriesRequest{\n\t\t\tTerm:         10,\n\t\t\tPrevLogEntry: 100,\n\t\t\tPrevLogTerm:  4,\n\t\t\tEntries: []*Log{\n\t\t\t\t{\n\t\t\t\t\tIndex: 101,\n\t\t\t\t\tTerm:  4,\n\t\t\t\t\tType:  LogNoop,\n\t\t\t\t},\n\t\t\t},\n\t\t\tLeaderCommitIndex: 90,\n\t\t\tRPCHeader:         RPCHeader{Addr: []byte(\"cartman\")},\n\t\t}\n\n\t\tresp := AppendEntriesResponse{\n\t\t\tTerm:    4,\n\t\t\tLastLog: 90,\n\t\t\tSuccess: true,\n\t\t}\n\n\t\t// Listen for a request\n\t\tgo func() {\n\t\t\tselect {\n\t\t\tcase rpc := <-rpcCh:\n\t\t\t\t// Verify the command\n\t\t\t\treq := rpc.Command.(*AppendEntriesRequest)\n\t\t\t\tif !reflect.DeepEqual(req, &args) {\n\t\t\t\t\tt.Errorf(\"command mismatch: %#v %#v\", *req, args)\n\t\t\t\t\treturn\n\t\t\t\t}\n\t\t\t\trpc.Respond(&resp, nil)\n\n\t\t\tcase <-time.After(200 * time.Millisecond):\n\t\t\t\tt.Errorf(\"timeout\")\n\t\t\t}\n\t\t}()\n\n\t\t// Transport 2 makes outbound request\n\t\taddr2, trans2 := NewTestTransport(ttype, \"\")\n\t\tdefer func() { _ = trans2.Close() }()\n\n\t\ttrans1.Connect(addr2, trans2)\n\t\ttrans2.Connect(addr1, trans1)\n\n\t\tvar out AppendEntriesResponse\n\t\tif err := trans2.AppendEntries(\"id1\", trans1.LocalAddr(), &args, &out); err != nil {\n\t\t\tt.Fatalf(\"err: %v\", err)\n\t\t}\n\n\t\t// Verify the response\n\t\tif !reflect.DeepEqual(resp, out) {\n\t\t\tt.Fatalf(\"command mismatch: %#v %#v\", resp, out)\n\t\t}\n\t}\n}\n\nfunc TestTransport_AppendEntriesPipeline(t *testing.T) {\n\tfor ttype := 0; ttype < numTestTransports; ttype++ {\n\t\taddr1, trans1 := NewTestTransport(ttype, \"\")\n\t\tdefer func() { _ = trans1.Close() }()\n\t\trpcCh := trans1.Consumer()\n\n\t\t// Make the RPC request\n\t\targs := AppendEntriesRequest{\n\t\t\tTerm:         10,\n\t\t\tPrevLogEntry: 100,\n\t\t\tPrevLogTerm:  4,\n\t\t\tEntries: []*Log{\n\t\t\t\t{\n\t\t\t\t\tIndex: 101,\n\t\t\t\t\tTerm:  4,\n\t\t\t\t\tType:  LogNoop,\n\t\t\t\t},\n\t\t\t},\n\t\t\tLeaderCommitIndex: 90,\n\t\t\tRPCHeader:         RPCHeader{Addr: []byte(\"cartman\")},\n\t\t}\n\n\t\tresp := AppendEntriesResponse{\n\t\t\tTerm:    4,\n\t\t\tLastLog: 90,\n\t\t\tSuccess: true,\n\t\t}\n\n\t\t// Listen for a request\n\t\tgo func() {\n\t\t\tfor i := 0; i < 10; i++ {\n\t\t\t\tselect {\n\t\t\t\tcase rpc := <-rpcCh:\n\t\t\t\t\t// Verify the command\n\t\t\t\t\treq := rpc.Command.(*AppendEntriesRequest)\n\t\t\t\t\tif !reflect.DeepEqual(req, &args) {\n\t\t\t\t\t\tt.Errorf(\"command mismatch: %#v %#v\", *req, args)\n\t\t\t\t\t\treturn\n\t\t\t\t\t}\n\t\t\t\t\trpc.Respond(&resp, nil)\n\n\t\t\t\tcase <-time.After(200 * time.Millisecond):\n\t\t\t\t\tt.Errorf(\"timeout\")\n\t\t\t\t\treturn\n\t\t\t\t}\n\t\t\t}\n\t\t}()\n\n\t\t// Transport 2 makes outbound request\n\t\taddr2, trans2 := NewTestTransport(ttype, \"\")\n\t\tdefer func() { _ = trans2.Close() }()\n\n\t\ttrans1.Connect(addr2, trans2)\n\t\ttrans2.Connect(addr1, trans1)\n\n\t\tpipeline, err := trans2.AppendEntriesPipeline(\"id1\", trans1.LocalAddr())\n\t\tif err != nil {\n\t\t\tt.Fatalf(\"err: %v\", err)\n\t\t}\n\t\tdefer func() { _ = pipeline.Close() }()\n\t\tfor i := 0; i < 10; i++ {\n\t\t\tout := new(AppendEntriesResponse)\n\t\t\tif _, err := pipeline.AppendEntries(&args, out); err != nil {\n\t\t\t\tt.Fatalf(\"err: %v\", err)\n\t\t\t}\n\t\t}\n\n\t\trespCh := pipeline.Consumer()\n\t\tfor i := 0; i < 10; i++ {\n\t\t\tselect {\n\t\t\tcase ready := <-respCh:\n\t\t\t\t// Verify the response\n\t\t\t\tif !reflect.DeepEqual(&resp, ready.Response()) {\n\t\t\t\t\tt.Fatalf(\"command mismatch: %#v %#v\", &resp, ready.Response())\n\t\t\t\t}\n\t\t\tcase <-time.After(200 * time.Millisecond):\n\t\t\t\tt.Fatalf(\"timeout\")\n\t\t\t}\n\t\t}\n\t}\n}\n\nfunc TestTransport_RequestVote(t *testing.T) {\n\tfor ttype := 0; ttype < numTestTransports; ttype++ {\n\t\taddr1, trans1 := NewTestTransport(ttype, \"\")\n\t\tdefer func() { _ = trans1.Close() }()\n\t\trpcCh := trans1.Consumer()\n\n\t\t// Make the RPC request\n\t\targs := RequestVoteRequest{\n\t\t\tTerm:         20,\n\t\t\tLastLogIndex: 100,\n\t\t\tLastLogTerm:  19,\n\t\t\tRPCHeader:    RPCHeader{Addr: []byte(\"butters\")},\n\t\t}\n\t\tresp := RequestVoteResponse{\n\t\t\tTerm:    100,\n\t\t\tGranted: false,\n\t\t}\n\n\t\t// Listen for a request\n\t\tgo func() {\n\t\t\tselect {\n\t\t\tcase rpc := <-rpcCh:\n\t\t\t\t// Verify the command\n\t\t\t\treq := rpc.Command.(*RequestVoteRequest)\n\t\t\t\tif !reflect.DeepEqual(req, &args) {\n\t\t\t\t\tt.Errorf(\"command mismatch: %#v %#v\", *req, args)\n\t\t\t\t\treturn\n\t\t\t\t}\n\n\t\t\t\trpc.Respond(&resp, nil)\n\n\t\t\tcase <-time.After(200 * time.Millisecond):\n\t\t\t\tt.Errorf(\"timeout\")\n\t\t\t}\n\t\t}()\n\n\t\t// Transport 2 makes outbound request\n\t\taddr2, trans2 := NewTestTransport(ttype, \"\")\n\t\tdefer func() { _ = trans2.Close() }()\n\n\t\ttrans1.Connect(addr2, trans2)\n\t\ttrans2.Connect(addr1, trans1)\n\n\t\tvar out RequestVoteResponse\n\t\tif err := trans2.RequestVote(\"id1\", trans1.LocalAddr(), &args, &out); err != nil {\n\t\t\tt.Fatalf(\"err: %v\", err)\n\t\t}\n\n\t\t// Verify the response\n\t\tif !reflect.DeepEqual(resp, out) {\n\t\t\tt.Fatalf(\"command mismatch: %#v %#v\", resp, out)\n\t\t}\n\t}\n}\n\nfunc TestTransport_InstallSnapshot(t *testing.T) {\n\tfor ttype := 0; ttype < numTestTransports; ttype++ {\n\t\taddr1, trans1 := NewTestTransport(ttype, \"\")\n\t\tdefer func() { _ = trans1.Close() }()\n\t\trpcCh := trans1.Consumer()\n\n\t\t// Make the RPC request\n\t\targs := InstallSnapshotRequest{\n\t\t\tTerm:         10,\n\t\t\tLastLogIndex: 100,\n\t\t\tLastLogTerm:  9,\n\t\t\tPeers:        []byte(\"blah blah\"),\n\t\t\tSize:         10,\n\t\t\tRPCHeader:    RPCHeader{Addr: []byte(\"kyle\")},\n\t\t}\n\n\t\tresp := InstallSnapshotResponse{\n\t\t\tTerm:    10,\n\t\t\tSuccess: true,\n\t\t}\n\n\t\t// Listen for a request\n\t\tgo func() {\n\t\t\tselect {\n\t\t\tcase rpc := <-rpcCh:\n\t\t\t\t// Verify the command\n\t\t\t\treq := rpc.Command.(*InstallSnapshotRequest)\n\t\t\t\tif !reflect.DeepEqual(req, &args) {\n\t\t\t\t\tt.Errorf(\"command mismatch: %#v %#v\", *req, args)\n\t\t\t\t\treturn\n\t\t\t\t}\n\n\t\t\t\t// Try to read the bytes\n\t\t\t\tbuf := make([]byte, 10)\n\t\t\t\t_, _ = rpc.Reader.Read(buf)\n\n\t\t\t\t// Compare\n\t\t\t\tif !bytes.Equal(buf, []byte(\"0123456789\")) {\n\t\t\t\t\tt.Errorf(\"bad buf %v\", buf)\n\t\t\t\t\treturn\n\t\t\t\t}\n\n\t\t\t\trpc.Respond(&resp, nil)\n\n\t\t\tcase <-time.After(200 * time.Millisecond):\n\t\t\t\tt.Errorf(\"timeout\")\n\t\t\t}\n\t\t}()\n\n\t\t// Transport 2 makes outbound request\n\t\taddr2, trans2 := NewTestTransport(ttype, \"\")\n\t\tdefer func() { _ = trans2.Close() }()\n\n\t\ttrans1.Connect(addr2, trans2)\n\t\ttrans2.Connect(addr1, trans1)\n\n\t\t// Create a buffer\n\t\tbuf := bytes.NewBuffer([]byte(\"0123456789\"))\n\n\t\tvar out InstallSnapshotResponse\n\t\tif err := trans2.InstallSnapshot(\"id1\", trans1.LocalAddr(), &args, &out, buf); err != nil {\n\t\t\tt.Fatalf(\"err: %v\", err)\n\t\t}\n\n\t\t// Verify the response\n\t\tif !reflect.DeepEqual(resp, out) {\n\t\t\tt.Fatalf(\"command mismatch: %#v %#v\", resp, out)\n\t\t}\n\t}\n}\n\nfunc TestTransport_EncodeDecode(t *testing.T) {\n\tfor ttype := 0; ttype < numTestTransports; ttype++ {\n\t\t_, trans1 := NewTestTransport(ttype, \"\")\n\t\tdefer func() { _ = trans1.Close() }()\n\n\t\tlocal := trans1.LocalAddr()\n\t\tenc := trans1.EncodePeer(\"aaaa\", local)\n\t\tdec := trans1.DecodePeer(enc)\n\n\t\tif dec != local {\n\t\t\tt.Fatalf(\"enc/dec fail: %v %v\", dec, local)\n\t\t}\n\t}\n}\n"
  },
  {
    "path": "util.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"bytes\"\n\tcrand \"crypto/rand\"\n\t\"fmt\"\n\t\"math\"\n\t\"math/big\"\n\t\"math/rand\"\n\t\"time\"\n\n\t\"github.com/hashicorp/go-msgpack/v2/codec\"\n)\n\n// returns an int64 from a crypto random source\n// can be used to seed a source for a math/rand.\nfunc newSeed() int64 {\n\tr, err := crand.Int(crand.Reader, big.NewInt(math.MaxInt64))\n\tif err != nil {\n\t\tpanic(fmt.Errorf(\"failed to read random bytes: %v\", err))\n\t}\n\treturn r.Int64()\n}\n\n// randomTimeout returns a value that is between the minVal and 2x minVal.\nfunc randomTimeout(minVal time.Duration) <-chan time.Time {\n\tif minVal == 0 {\n\t\treturn nil\n\t}\n\textra := time.Duration(rand.Int63()) % minVal\n\treturn time.After(minVal + extra)\n}\n\n// min returns the minimum.\nfunc min(a, b uint64) uint64 {\n\tif a <= b {\n\t\treturn a\n\t}\n\treturn b\n}\n\n// max returns the maximum.\nfunc max(a, b uint64) uint64 {\n\tif a >= b {\n\t\treturn a\n\t}\n\treturn b\n}\n\n// generateUUID is used to generate a random UUID.\nfunc generateUUID() string {\n\tbuf := make([]byte, 16)\n\tif _, err := crand.Read(buf); err != nil {\n\t\tpanic(fmt.Errorf(\"failed to read random bytes: %v\", err))\n\t}\n\n\treturn fmt.Sprintf(\"%08x-%04x-%04x-%04x-%12x\",\n\t\tbuf[0:4],\n\t\tbuf[4:6],\n\t\tbuf[6:8],\n\t\tbuf[8:10],\n\t\tbuf[10:16])\n}\n\n// asyncNotifyCh is used to do an async channel send\n// to a single channel without blocking.\nfunc asyncNotifyCh(ch chan struct{}) {\n\tselect {\n\tcase ch <- struct{}{}:\n\tdefault:\n\t}\n}\n\n// drainNotifyCh empties out a single-item notification channel without\n// blocking, and returns whether it received anything.\nfunc drainNotifyCh(ch chan struct{}) bool {\n\tselect {\n\tcase <-ch:\n\t\treturn true\n\tdefault:\n\t\treturn false\n\t}\n}\n\n// overrideNotifyBool is used to notify on a bool channel\n// but override existing value if value is present.\n// ch must be 1-item buffered channel.\n//\n// This method does not support multiple concurrent calls.\nfunc overrideNotifyBool(ch chan bool, v bool) {\n\tselect {\n\tcase ch <- v:\n\t\t// value sent, all done\n\tcase <-ch:\n\t\t// channel had an old value\n\t\tselect {\n\t\tcase ch <- v:\n\t\tdefault:\n\t\t\tpanic(\"race: channel was sent concurrently\")\n\t\t}\n\t}\n}\n\n// Decode reverses the encode operation on a byte slice input.\nfunc decodeMsgPack(buf []byte, out interface{}) error {\n\tr := bytes.NewBuffer(buf)\n\thd := codec.MsgpackHandle{}\n\tdec := codec.NewDecoder(r, &hd)\n\treturn dec.Decode(out)\n}\n\n// Encode writes an encoded object to a new bytes buffer.\nfunc encodeMsgPack(in interface{}) (*bytes.Buffer, error) {\n\tbuf := bytes.NewBuffer(nil)\n\thd := codec.MsgpackHandle{}\n\thd.TimeNotBuiltin = true\n\n\tenc := codec.NewEncoder(buf, &hd)\n\terr := enc.Encode(in)\n\treturn buf, err\n}\n\n// backoff is used to compute an exponential backoff\n// duration. Base time is scaled by the current round,\n// up to some maximum scale factor.\nfunc backoff(base time.Duration, round, limit uint64) time.Duration {\n\tpower := min(round, limit)\n\tfor power > 2 {\n\t\tbase *= 2\n\t\tpower--\n\t}\n\treturn base\n}\n\n// cappedExponentialBackoff computes the exponential backoff with an adjustable\n// cap on the max timeout.\nfunc cappedExponentialBackoff(base time.Duration, round, limit uint64, cap time.Duration) time.Duration {\n\tpower := min(round, limit)\n\tfor power > 2 {\n\t\tif base > cap {\n\t\t\treturn cap\n\t\t}\n\t\tbase *= 2\n\t\tpower--\n\t}\n\tif base > cap {\n\t\treturn cap\n\t}\n\treturn base\n}\n\n// Needed for sorting []uint64, used to determine commitment\ntype uint64Slice []uint64\n\nfunc (p uint64Slice) Len() int           { return len(p) }\nfunc (p uint64Slice) Less(i, j int) bool { return p[i] < p[j] }\nfunc (p uint64Slice) Swap(i, j int)      { p[i], p[j] = p[j], p[i] }\n"
  },
  {
    "path": "util_test.go",
    "content": "// Copyright IBM Corp. 2013, 2025\n// SPDX-License-Identifier: MPL-2.0\n\npackage raft\n\nimport (\n\t\"bytes\"\n\t\"regexp\"\n\t\"testing\"\n\t\"time\"\n)\n\n// TestMsgpackEncodeTime ensures that we don't break backwards compatibility when updating go-msgpack with\n// Raft binary formats.\nfunc TestMsgpackEncodeTimeDefaultFormat(t *testing.T) {\n\tstamp := \"2006-01-02T15:04:05Z\"\n\ttm, err := time.Parse(time.RFC3339, stamp)\n\tif err != nil {\n\t\tt.Fatal(err)\n\t}\n\tbuf, err := encodeMsgPack(tm)\n\tif err != nil {\n\t\tt.Errorf(\"Failed to encode time %s: %v\", stamp, err)\n\t}\n\n\texpected := []byte{175, 1, 0, 0, 0, 14, 187, 75, 55, 229, 0, 0, 0, 0, 255, 255}\n\n\tif !bytes.Equal(buf.Bytes(), expected) {\n\t\tt.Errorf(\"Expected time %s to encode as %+v but got %+v\", stamp, expected, buf.Bytes())\n\t}\n}\n\nfunc TestRandomTimeout(t *testing.T) {\n\tstart := time.Now()\n\ttimeout := randomTimeout(time.Millisecond)\n\n\tselect {\n\tcase <-timeout:\n\t\tdiff := time.Since(start)\n\t\tif diff < time.Millisecond {\n\t\t\tt.Fatalf(\"fired early\")\n\t\t}\n\tcase <-time.After(3 * time.Millisecond):\n\t\tt.Fatalf(\"timeout\")\n\t}\n}\n\nfunc TestNewSeed(t *testing.T) {\n\tvals := make(map[int64]bool)\n\tfor i := 0; i < 1000; i++ {\n\t\tseed := newSeed()\n\t\tif _, exists := vals[seed]; exists {\n\t\t\tt.Fatal(\"newSeed() return a value it'd previously returned\")\n\t\t}\n\t\tvals[seed] = true\n\t}\n}\n\nfunc TestRandomTimeout_NoTime(t *testing.T) {\n\ttimeout := randomTimeout(0)\n\tif timeout != nil {\n\t\tt.Fatalf(\"expected nil channel\")\n\t}\n}\n\nfunc TestMin(t *testing.T) {\n\tif min(1, 1) != 1 {\n\t\tt.Fatalf(\"bad min\")\n\t}\n\tif min(2, 1) != 1 {\n\t\tt.Fatalf(\"bad min\")\n\t}\n\tif min(1, 2) != 1 {\n\t\tt.Fatalf(\"bad min\")\n\t}\n}\n\nfunc TestMax(t *testing.T) {\n\tif max(1, 1) != 1 {\n\t\tt.Fatalf(\"bad max\")\n\t}\n\tif max(2, 1) != 2 {\n\t\tt.Fatalf(\"bad max\")\n\t}\n\tif max(1, 2) != 2 {\n\t\tt.Fatalf(\"bad max\")\n\t}\n}\n\nfunc TestGenerateUUID(t *testing.T) {\n\tprev := generateUUID()\n\tre, err := regexp.Compile(`[\\da-f]{8}-[\\da-f]{4}-[\\da-f]{4}-[\\da-f]{4}-[\\da-f]{12}`)\n\tif err != nil {\n\t\tt.Errorf(\"failed to copmile regex: %s\", err)\n\t}\n\n\tfor i := 0; i < 100; i++ {\n\t\tid := generateUUID()\n\t\tif prev == id {\n\t\t\tt.Fatalf(\"Should get a new ID!\")\n\t\t}\n\n\t\tmatched := re.MatchString(id)\n\t\tif !matched {\n\t\t\tt.Fatalf(\"expected match %s %v\", id, matched)\n\t\t}\n\t}\n}\n\nfunc TestBackoff(t *testing.T) {\n\tb := backoff(10*time.Millisecond, 1, 8)\n\tif b != 10*time.Millisecond {\n\t\tt.Fatalf(\"bad: %v\", b)\n\t}\n\n\tb = backoff(20*time.Millisecond, 2, 8)\n\tif b != 20*time.Millisecond {\n\t\tt.Fatalf(\"bad: %v\", b)\n\t}\n\n\tb = backoff(10*time.Millisecond, 8, 8)\n\tif b != 640*time.Millisecond {\n\t\tt.Fatalf(\"bad: %v\", b)\n\t}\n\n\tb = backoff(10*time.Millisecond, 9, 8)\n\tif b != 640*time.Millisecond {\n\t\tt.Fatalf(\"bad: %v\", b)\n\t}\n}\n\nfunc TestOverrideNotifyBool(t *testing.T) {\n\tch := make(chan bool, 1)\n\n\t// sanity check - buffered channel don't have any values\n\tselect {\n\tcase v := <-ch:\n\t\tt.Fatalf(\"unexpected receive: %v\", v)\n\tdefault:\n\t}\n\n\t// simple case of a single push\n\toverrideNotifyBool(ch, false)\n\tselect {\n\tcase v := <-ch:\n\t\tif v != false {\n\t\t\tt.Fatalf(\"expected false but got %v\", v)\n\t\t}\n\tdefault:\n\t\tt.Fatalf(\"expected a value but is not ready\")\n\t}\n\n\t// assert that function never blocks and only last item is received\n\toverrideNotifyBool(ch, false)\n\toverrideNotifyBool(ch, false)\n\toverrideNotifyBool(ch, false)\n\toverrideNotifyBool(ch, false)\n\toverrideNotifyBool(ch, true)\n\n\tselect {\n\tcase v := <-ch:\n\t\tif v != true {\n\t\t\tt.Fatalf(\"expected true but got %v\", v)\n\t\t}\n\tdefault:\n\t\tt.Fatalf(\"expected a value but is not ready\")\n\t}\n\n\t// no further value is available\n\tselect {\n\tcase v := <-ch:\n\t\tt.Fatalf(\"unexpected receive: %v\", v)\n\tdefault:\n\t}\n}\n"
  }
]